Coverage for mlos_bench/mlos_bench/tests/storage/exp_data_test.py: 100%

64 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-21 01:50 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Unit tests for loading the experiment metadata.""" 

6 

7from mlos_bench.storage.base_experiment_data import ExperimentData 

8from mlos_bench.storage.base_storage import Storage 

9from mlos_bench.tests.storage import ( 

10 CONFIG_COUNT, 

11 CONFIG_TRIAL_REPEAT_COUNT, 

12 MAX_TRIALS, 

13 TRIAL_RUNNER_COUNT, 

14) 

15from mlos_bench.tunables.tunable_groups import TunableGroups 

16 

17 

18def test_load_empty_exp_data(storage: Storage, exp_storage: Storage.Experiment) -> None: 

19 """Try to retrieve old experimental data from the empty storage.""" 

20 exp = storage.experiments[exp_storage.experiment_id] 

21 assert exp.experiment_id == exp_storage.experiment_id 

22 assert exp.description == exp_storage.description 

23 assert exp.objectives == exp_storage.opt_targets 

24 

25 

26def test_exp_data_root_env_config( 

27 exp_storage: Storage.Experiment, 

28 exp_data: ExperimentData, 

29) -> None: 

30 """Tests the root_env_config property of ExperimentData.""" 

31 # pylint: disable=protected-access 

32 assert exp_data.root_env_config == ( 

33 exp_storage._root_env_config, 

34 exp_storage._git_repo, 

35 exp_storage._git_commit, 

36 ) 

37 

38 

39def test_exp_trial_data_objectives( 

40 storage: Storage, 

41 exp_storage: Storage.Experiment, 

42 tunable_groups: TunableGroups, 

43) -> None: 

44 """Start a new trial and check the storage for the trial data.""" 

45 

46 trial_opt_new = exp_storage.new_trial( 

47 tunable_groups, 

48 config={ 

49 "opt_target": "some-other-target", 

50 "opt_direction": "max", 

51 }, 

52 ) 

53 assert trial_opt_new.config() == { 

54 "experiment_id": exp_storage.experiment_id, 

55 "trial_id": trial_opt_new.trial_id, 

56 "opt_target": "some-other-target", 

57 "opt_direction": "max", 

58 } 

59 

60 trial_opt_old = exp_storage.new_trial( 

61 tunable_groups, 

62 config={ 

63 "opt_target": "back-compat", 

64 # "opt_direction": "max", # missing 

65 }, 

66 ) 

67 assert trial_opt_old.config() == { 

68 "experiment_id": exp_storage.experiment_id, 

69 "trial_id": trial_opt_old.trial_id, 

70 "opt_target": "back-compat", 

71 } 

72 

73 exp = storage.experiments[exp_storage.experiment_id] 

74 assert exp.objectives == exp_storage.opt_targets 

75 

76 trial_data_opt_new = exp.trials[trial_opt_new.trial_id] 

77 assert trial_data_opt_new.metadata_dict == { 

78 "opt_target": "some-other-target", 

79 "opt_direction": "max", 

80 } 

81 

82 

83def test_exp_data_results_df(exp_data: ExperimentData, tunable_groups: TunableGroups) -> None: 

84 """Tests the results_df property of ExperimentData.""" 

85 results_df = exp_data.results_df 

86 expected_trials_count = MAX_TRIALS 

87 assert len(results_df) == expected_trials_count 

88 assert len(results_df["tunable_config_id"].unique()) == CONFIG_COUNT 

89 assert len(results_df["trial_id"].unique()) == expected_trials_count 

90 obj_target = next(iter(exp_data.objectives)) 

91 assert ( 

92 len(results_df[ExperimentData.RESULT_COLUMN_PREFIX + obj_target]) == expected_trials_count 

93 ) 

94 (tunable, _covariant_group) = next(iter(tunable_groups)) 

95 assert ( 

96 len(results_df[ExperimentData.CONFIG_COLUMN_PREFIX + tunable.name]) 

97 == expected_trials_count 

98 ) 

99 

100 

101def test_exp_no_tunables_data_results_df(exp_no_tunables_data: ExperimentData) -> None: 

102 """Tests the results_df property of ExperimentData when there are no tunables.""" 

103 results_df = exp_no_tunables_data.results_df 

104 expected_trials_count = CONFIG_COUNT * CONFIG_TRIAL_REPEAT_COUNT 

105 assert len(results_df) == expected_trials_count 

106 assert len(results_df["trial_id"].unique()) == expected_trials_count 

107 obj_target = next(iter(exp_no_tunables_data.objectives)) 

108 assert ( 

109 len(results_df[ExperimentData.RESULT_COLUMN_PREFIX + obj_target]) == expected_trials_count 

110 ) 

111 assert not results_df.columns.str.startswith(ExperimentData.CONFIG_COLUMN_PREFIX).any() 

112 

113 

114def test_exp_data_tunable_config_trial_group_id_in_results_df(exp_data: ExperimentData) -> None: 

115 """ 

116 Tests the tunable_config_trial_group_id property of ExperimentData.results_df. 

117 

118 See Also: test_exp_trial_data_tunable_config_trial_group_id() 

119 """ 

120 results_df = exp_data.results_df 

121 

122 # First three trials should use the same config. 

123 trial_1_df = results_df.loc[(results_df["trial_id"] == 1)] 

124 assert len(trial_1_df) == 1 

125 assert trial_1_df["tunable_config_id"].iloc[0] == 1 

126 assert trial_1_df["tunable_config_trial_group_id"].iloc[0] == 1 

127 

128 trial_2_df = results_df.loc[(results_df["trial_id"] == 2)] 

129 assert len(trial_2_df) == 1 

130 assert trial_2_df["tunable_config_id"].iloc[0] == 1 

131 assert trial_2_df["tunable_config_trial_group_id"].iloc[0] == 1 

132 

133 # The fourth, should be a new config. 

134 trial_4_df = results_df.loc[(results_df["trial_id"] == 4)] 

135 assert len(trial_4_df) == 1 

136 assert trial_4_df["tunable_config_id"].iloc[0] == 2 

137 assert trial_4_df["tunable_config_trial_group_id"].iloc[0] == 4 

138 

139 # And so on ... 

140 

141 

142def test_exp_data_tunable_config_trial_groups(exp_data: ExperimentData) -> None: 

143 """ 

144 Tests the tunable_config_trial_groups property of ExperimentData. 

145 

146 This tests bulk loading of the tunable_config_trial_groups. 

147 """ 

148 # Should be keyed by config_id. 

149 assert list(exp_data.tunable_config_trial_groups.keys()) == list(range(1, CONFIG_COUNT + 1)) 

150 # Which should match the objects. 

151 assert [ 

152 config_trial_group.tunable_config_id 

153 for config_trial_group in exp_data.tunable_config_trial_groups.values() 

154 ] == list(range(1, CONFIG_COUNT + 1)) 

155 # And the tunable_config_trial_group_id should also match the minimum trial_id. 

156 assert [ 

157 config_trial_group.tunable_config_trial_group_id 

158 for config_trial_group in exp_data.tunable_config_trial_groups.values() 

159 ] == list(range(1, CONFIG_COUNT * CONFIG_TRIAL_REPEAT_COUNT, CONFIG_TRIAL_REPEAT_COUNT)) 

160 

161 

162def test_exp_data_tunable_configs(exp_data: ExperimentData) -> None: 

163 """Tests the tunable_configs property of ExperimentData.""" 

164 # Should be keyed by config_id. 

165 assert list(exp_data.tunable_configs.keys()) == list(range(1, CONFIG_COUNT + 1)) 

166 # Which should match the objects. 

167 assert [config.tunable_config_id for config in exp_data.tunable_configs.values()] == list( 

168 range(1, CONFIG_COUNT + 1) 

169 ) 

170 

171 

172def test_exp_data_default_config_id(exp_data: ExperimentData) -> None: 

173 """Tests the default_tunable_config_id property of ExperimentData.""" 

174 assert exp_data.default_tunable_config_id == 1 

175 

176 

177def test_trial_runner_id_results_df_column(exp_data: ExperimentData) -> None: 

178 """Ensure the results_df has the expected trial_runner_columns as well.""" 

179 assert exp_data.results_df["trial_runner_id"].isna().sum() == 0 

180 assert set(exp_data.results_df["trial_runner_id"].unique()) == set( 

181 range(1, TRIAL_RUNNER_COUNT + 1) 

182 )