Coverage for mlos_core/mlos_core/data_classes.py: 93%

149 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-21 01:50 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Data classes for ``mlos_core`` used to pass around configurations, observations, and 

7suggestions. 

8 

9``mlos_core`` uses :external:py:mod:`pandas` :external:py:class:`~pandas.DataFrame` 

10s and :external:py:class:`~pandas.Series` to represent configurations and scores and 

11context (information about where the configuration was evaluated). 

12 

13These modules encapsulate tuples of those for easier passing around and manipulation. 

14""" 

15from collections.abc import Iterable, Iterator 

16from typing import Any 

17 

18import pandas as pd 

19from ConfigSpace import Configuration, ConfigurationSpace 

20 

21from mlos_core.util import compare_optional_dataframe, compare_optional_series 

22 

23 

24class Observation: 

25 """A single observation of a configuration.""" 

26 

27 def __init__( 

28 self, 

29 *, 

30 config: pd.Series, 

31 score: pd.Series = pd.Series(), 

32 context: pd.Series | None = None, 

33 metadata: pd.Series | None = None, 

34 ): 

35 """ 

36 Creates a new Observation object. 

37 

38 Parameters 

39 ---------- 

40 config : pandas.Series 

41 The configuration observed. 

42 score : pandas.Series 

43 The score metrics observed. 

44 context : pandas.Series | None 

45 The context in which the configuration was evaluated. 

46 Not Yet Implemented. 

47 metadata: pandas.Series | None 

48 The metadata in which the configuration was evaluated 

49 """ 

50 self._config = config 

51 self._score = score 

52 self._context = context 

53 self._metadata = metadata 

54 

55 @property 

56 def config(self) -> pd.Series: 

57 """Gets (a copy of) the config of the Observation.""" 

58 return self._config.copy() 

59 

60 @property 

61 def score(self) -> pd.Series: 

62 """Gets (a copy of) the score of the Observation.""" 

63 return self._score.copy() 

64 

65 @property 

66 def context(self) -> pd.Series | None: 

67 """Gets (a copy of) the context of the Observation.""" 

68 return self._context.copy() if self._context is not None else None 

69 

70 @property 

71 def metadata(self) -> pd.Series | None: 

72 """Gets (a copy of) the metadata of the Observation.""" 

73 return self._metadata.copy() if self._metadata is not None else None 

74 

75 def to_suggestion(self) -> "Suggestion": 

76 """ 

77 Converts the observation to a suggestion. 

78 

79 Returns 

80 ------- 

81 Suggestion 

82 The suggestion. 

83 """ 

84 return Suggestion( 

85 config=self.config, 

86 context=self.context, 

87 metadata=self.metadata, 

88 ) 

89 

90 def __repr__(self) -> str: 

91 return ( 

92 f"Observation(config={self._config}, score={self._score}, " 

93 f"context={self._context}, metadata={self._metadata})" 

94 ) 

95 

96 def __eq__(self, other: Any) -> bool: 

97 if not isinstance(other, Observation): 

98 return False 

99 

100 if not self._config.equals(other._config): 

101 return False 

102 if not self._score.equals(other._score): 

103 return False 

104 if not compare_optional_series(self._context, other._context): 

105 return False 

106 if not compare_optional_series(self._metadata, other._metadata): 

107 return False 

108 

109 return True 

110 

111 def __ne__(self, other: Any) -> bool: 

112 return not self.__eq__(other) 

113 

114 

115class Observations: 

116 """A set of observations of a configuration scores.""" 

117 

118 def __init__( # pylint: disable=too-many-arguments 

119 self, 

120 *, 

121 configs: pd.DataFrame = pd.DataFrame(), 

122 scores: pd.DataFrame = pd.DataFrame(), 

123 contexts: pd.DataFrame | None = None, 

124 metadata: pd.DataFrame | None = None, 

125 observations: Iterable[Observation] | None = None, 

126 ): 

127 """ 

128 Creates a new Observation object. 

129 

130 Can accept either a set of Observations or a collection of aligned config and 

131 score (and optionally context) dataframes. 

132 

133 If both are provided the two sets will be merged. 

134 

135 Parameters 

136 ---------- 

137 configs : pandas.DataFrame 

138 Pandas dataframe containing configurations. Column names are the parameter names. 

139 scores : pandas.DataFrame 

140 The score metrics observed in a dataframe. 

141 contexts : pandas.DataFrame | None 

142 The context in which the configuration was evaluated. 

143 Not Yet Implemented. 

144 metadata: pandas.DataFrame | None 

145 The metadata in which the configuration was evaluated 

146 Not Yet Implemented. 

147 """ 

148 if observations is None: 

149 observations = [] 

150 if any(observations): 

151 configs = pd.concat([obs.config.to_frame().T for obs in observations]) 

152 scores = pd.concat([obs.score.to_frame().T for obs in observations]) 

153 

154 if sum(obs.context is None for obs in observations) == 0: 

155 contexts = pd.concat( 

156 [obs.context.to_frame().T for obs in observations] # type: ignore[union-attr] 

157 ) 

158 else: 

159 contexts = None 

160 if sum(obs.metadata is None for obs in observations) == 0: 

161 metadata = pd.concat( 

162 [obs.metadata.to_frame().T for obs in observations] # type: ignore[union-attr] 

163 ) 

164 else: 

165 metadata = None 

166 assert len(configs.index) == len( 

167 scores.index 

168 ), "config and score must have the same length" 

169 if contexts is not None: 

170 assert len(configs.index) == len( 

171 contexts.index 

172 ), "config and context must have the same length" 

173 if metadata is not None: 

174 assert len(configs.index) == len( 

175 metadata.index 

176 ), "config and metadata must have the same length" 

177 self._configs = configs.reset_index(drop=True) 

178 self._scores = scores.reset_index(drop=True) 

179 self._contexts = None if contexts is None else contexts.reset_index(drop=True) 

180 self._metadata = None if metadata is None else metadata.reset_index(drop=True) 

181 

182 @property 

183 def configs(self) -> pd.DataFrame: 

184 """Gets a copy of the configs of the Observations.""" 

185 return self._configs.copy() 

186 

187 @property 

188 def scores(self) -> pd.DataFrame: 

189 """Gets a copy of the scores of the Observations.""" 

190 return self._scores.copy() 

191 

192 @property 

193 def contexts(self) -> pd.DataFrame | None: 

194 """Gets a copy of the contexts of the Observations.""" 

195 return self._contexts.copy() if self._contexts is not None else None 

196 

197 @property 

198 def metadata(self) -> pd.DataFrame | None: 

199 """Gets a copy of the metadata of the Observations.""" 

200 return self._metadata.copy() if self._metadata is not None else None 

201 

202 def filter_by_index(self, index: pd.Index) -> "Observations": 

203 """ 

204 Filters the observation by the given indices. 

205 

206 Parameters 

207 ---------- 

208 index : pandas.Index 

209 The indices to filter by. 

210 

211 Returns 

212 ------- 

213 Observation 

214 The filtered observation. 

215 """ 

216 return Observations( 

217 configs=self._configs.loc[index].copy(), 

218 scores=self._scores.loc[index].copy(), 

219 contexts=None if self._contexts is None else self._contexts.loc[index].copy(), 

220 metadata=None if self._metadata is None else self._metadata.loc[index].copy(), 

221 ) 

222 

223 def append(self, observation: Observation) -> None: 

224 """ 

225 Appends the given observation to this observation. 

226 

227 Parameters 

228 ---------- 

229 observation : Observation 

230 The observation to append. 

231 """ 

232 config = observation.config.to_frame().T 

233 score = observation.score.to_frame().T 

234 context = None if observation.context is None else observation.context.to_frame().T 

235 metadata = None if observation.metadata is None else observation.metadata.to_frame().T 

236 if len(self._configs.index) == 0: 

237 self._configs = config 

238 self._scores = score 

239 self._contexts = context 

240 self._metadata = metadata 

241 assert set(self.configs.index) == set( 

242 self.scores.index 

243 ), "config and score must have the same index" 

244 return 

245 

246 self._configs = pd.concat([self._configs, config]).reset_index(drop=True) 

247 self._scores = pd.concat([self._scores, score]).reset_index(drop=True) 

248 assert set(self.configs.index) == set( 

249 self.scores.index 

250 ), "config and score must have the same index" 

251 

252 if self._contexts is not None: 

253 assert context is not None, ( 

254 "context of appending observation must not be null " 

255 "if context of prior observation is not null" 

256 ) 

257 self._contexts = pd.concat([self._contexts, context]).reset_index(drop=True) 

258 assert self._configs.index.equals( 

259 self._contexts.index 

260 ), "config and context must have the same index" 

261 else: 

262 assert context is None, ( 

263 "context of appending observation must be null " 

264 "if context of prior observation is null" 

265 ) 

266 if self._metadata is not None: 

267 assert metadata is not None, ( 

268 "context of appending observation must not be null " 

269 "if metadata of prior observation is not null" 

270 ) 

271 self._metadata = pd.concat([self._metadata, metadata]).reset_index(drop=True) 

272 assert self._configs.index.equals( 

273 self._metadata.index 

274 ), "config and metadata must have the same index" 

275 else: 

276 assert metadata is None, ( 

277 "context of appending observation must be null " 

278 "if metadata of prior observation is null" 

279 ) 

280 

281 def __len__(self) -> int: 

282 return len(self._configs.index) 

283 

284 def __iter__(self) -> Iterator["Observation"]: 

285 for idx in self._configs.index: 

286 yield Observation( 

287 config=self._configs.loc[idx], 

288 score=self._scores.loc[idx], 

289 context=None if self._contexts is None else self._contexts.loc[idx], 

290 metadata=None if self._metadata is None else self._metadata.loc[idx], 

291 ) 

292 

293 def __repr__(self) -> str: 

294 return ( 

295 f"Observation(configs={self._configs}, score={self._scores}, " 

296 "contexts={self._contexts}, metadata={self._metadata})" 

297 ) 

298 

299 def __eq__(self, other: Any) -> bool: 

300 if not isinstance(other, Observations): 

301 return False 

302 

303 if not self._configs.equals(other._configs): 

304 return False 

305 if not self._scores.equals(other._scores): 

306 return False 

307 if not compare_optional_dataframe(self._contexts, other._contexts): 

308 return False 

309 if not compare_optional_dataframe(self._metadata, other._metadata): 

310 return False 

311 

312 return True 

313 

314 # required as per: https://stackoverflow.com/questions/30643236/does-ne-use-an-overridden-eq 

315 def __ne__(self, other: Any) -> bool: 

316 return not self.__eq__(other) 

317 

318 

319class Suggestion: 

320 """ 

321 A single suggestion for a configuration. 

322 

323 A Suggestion is an Observation that has not yet been scored. Evaluating the 

324 Suggestion and calling `complete(scores)` can convert it to an Observation. 

325 """ 

326 

327 def __init__( 

328 self, 

329 *, 

330 config: pd.Series, 

331 context: pd.Series | None = None, 

332 metadata: pd.Series | None = None, 

333 ): 

334 """ 

335 Creates a new Suggestion. 

336 

337 Parameters 

338 ---------- 

339 config : pandas.Series 

340 The configuration suggested. 

341 context : pandas.Series | None 

342 The context for this suggestion, by default None 

343 metadata : pandas.Series | None 

344 Any metadata provided by the underlying optimizer, by default None 

345 """ 

346 self._config = config 

347 self._context = context 

348 self._metadata = metadata 

349 

350 @property 

351 def config(self) -> pd.Series: 

352 """Gets (a copy of) the config of the Suggestion.""" 

353 return self._config.copy() 

354 

355 @property 

356 def context(self) -> pd.Series | None: 

357 """Gets (a copy of) the context of the Suggestion.""" 

358 return self._context.copy() if self._context is not None else None 

359 

360 @property 

361 def metadata(self) -> pd.Series | None: 

362 """Gets (a copy of) the metadata of the Suggestion.""" 

363 return self._metadata.copy() if self._metadata is not None else None 

364 

365 def complete(self, score: pd.Series) -> Observation: 

366 """ 

367 Completes the Suggestion by adding a score to turn it into an Observation. 

368 

369 Parameters 

370 ---------- 

371 score : pandas.Series 

372 The score metrics observed. 

373 

374 Returns 

375 ------- 

376 Observation 

377 The observation of the suggestion. 

378 """ 

379 return Observation( 

380 config=self.config, 

381 score=score, 

382 context=self.context, 

383 metadata=self.metadata, 

384 ) 

385 

386 def to_configspace_config(self, space: ConfigurationSpace) -> Configuration: 

387 """ 

388 Convert a Configuration Space to a Configuration. 

389 

390 Parameters 

391 ---------- 

392 space : ConfigSpace.ConfigurationSpace 

393 The ConfigurationSpace to be converted. 

394 

395 Returns 

396 ------- 

397 ConfigSpace.Configuration 

398 The output Configuration. 

399 """ 

400 return Configuration(space, values=self._config.dropna().to_dict()) 

401 

402 def __repr__(self) -> str: 

403 return ( 

404 f"Suggestion(config={self._config}, context={self._context}, " 

405 "metadata={self._metadata})" 

406 ) 

407 

408 def __eq__(self, other: Any) -> bool: 

409 if not isinstance(other, Suggestion): 

410 return False 

411 

412 if not self._config.equals(other._config): 

413 return False 

414 if not compare_optional_series(self._context, other._context): 

415 return False 

416 if not compare_optional_series(self._metadata, other._metadata): 

417 return False 

418 

419 return True 

420 

421 def __ne__(self, other: Any) -> bool: 

422 return not self.__eq__(other)