Coverage for mlos_core/mlos_core/data_classes.py: 93%

148 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-14 01:58 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Data classes for ``mlos_core`` used to pass around configurations, observations, and 

7suggestions. 

8 

9``mlos_core`` uses :external:py:mod:`pandas` :external:py:class:`~pandas.DataFrame` 

10s and :external:py:class:`~pandas.Series` to represent configurations and scores and 

11context (information about where the configuration was evaluated). 

12 

13These modules encapsulate tuples of those for easier passing around and manipulation. 

14""" 

15from typing import Any, Iterable, Iterator, Optional 

16 

17import pandas as pd 

18from ConfigSpace import Configuration, ConfigurationSpace 

19 

20from mlos_core.util import compare_optional_dataframe, compare_optional_series 

21 

22 

23class Observation: 

24 """A single observation of a configuration.""" 

25 

26 def __init__( 

27 self, 

28 *, 

29 config: pd.Series, 

30 score: pd.Series = pd.Series(), 

31 context: Optional[pd.Series] = None, 

32 metadata: Optional[pd.Series] = None, 

33 ): 

34 """ 

35 Creates a new Observation object. 

36 

37 Parameters 

38 ---------- 

39 config : pandas.Series 

40 The configuration observed. 

41 score : pandas.Series 

42 The score metrics observed. 

43 context : Optional[pandas.Series] 

44 The context in which the configuration was evaluated. 

45 Not Yet Implemented. 

46 metadata: Optional[pandas.Series] 

47 The metadata in which the configuration was evaluated 

48 """ 

49 self._config = config 

50 self._score = score 

51 self._context = context 

52 self._metadata = metadata 

53 

54 @property 

55 def config(self) -> pd.Series: 

56 """Gets (a copy of) the config of the Observation.""" 

57 return self._config.copy() 

58 

59 @property 

60 def score(self) -> pd.Series: 

61 """Gets (a copy of) the score of the Observation.""" 

62 return self._score.copy() 

63 

64 @property 

65 def context(self) -> Optional[pd.Series]: 

66 """Gets (a copy of) the context of the Observation.""" 

67 return self._context.copy() if self._context is not None else None 

68 

69 @property 

70 def metadata(self) -> Optional[pd.Series]: 

71 """Gets (a copy of) the metadata of the Observation.""" 

72 return self._metadata.copy() if self._metadata is not None else None 

73 

74 def to_suggestion(self) -> "Suggestion": 

75 """ 

76 Converts the observation to a suggestion. 

77 

78 Returns 

79 ------- 

80 Suggestion 

81 The suggestion. 

82 """ 

83 return Suggestion( 

84 config=self.config, 

85 context=self.context, 

86 metadata=self.metadata, 

87 ) 

88 

89 def __repr__(self) -> str: 

90 return ( 

91 f"Observation(config={self._config}, score={self._score}, " 

92 f"context={self._context}, metadata={self._metadata})" 

93 ) 

94 

95 def __eq__(self, other: Any) -> bool: 

96 if not isinstance(other, Observation): 

97 return False 

98 

99 if not self._config.equals(other._config): 

100 return False 

101 if not self._score.equals(other._score): 

102 return False 

103 if not compare_optional_series(self._context, other._context): 

104 return False 

105 if not compare_optional_series(self._metadata, other._metadata): 

106 return False 

107 

108 return True 

109 

110 def __ne__(self, other: Any) -> bool: 

111 return not self.__eq__(other) 

112 

113 

114class Observations: 

115 """A set of observations of a configuration scores.""" 

116 

117 def __init__( # pylint: disable=too-many-arguments 

118 self, 

119 *, 

120 configs: pd.DataFrame = pd.DataFrame(), 

121 scores: pd.DataFrame = pd.DataFrame(), 

122 contexts: Optional[pd.DataFrame] = None, 

123 metadata: Optional[pd.DataFrame] = None, 

124 observations: Optional[Iterable[Observation]] = None, 

125 ): 

126 """ 

127 Creates a new Observation object. 

128 

129 Can accept either a set of Observations or a collection of aligned config and 

130 score (and optionally context) dataframes. 

131 

132 If both are provided the two sets will be merged. 

133 

134 Parameters 

135 ---------- 

136 configs : pandas.DataFrame 

137 Pandas dataframe containing configurations. Column names are the parameter names. 

138 scores : pandas.DataFrame 

139 The score metrics observed in a dataframe. 

140 contexts : Optional[pandas.DataFrame] 

141 The context in which the configuration was evaluated. 

142 Not Yet Implemented. 

143 metadata: Optional[pandas.DataFrame] 

144 The metadata in which the configuration was evaluated 

145 Not Yet Implemented. 

146 """ 

147 if observations is None: 

148 observations = [] 

149 if any(observations): 

150 configs = pd.concat([obs.config.to_frame().T for obs in observations]) 

151 scores = pd.concat([obs.score.to_frame().T for obs in observations]) 

152 

153 if sum(obs.context is None for obs in observations) == 0: 

154 contexts = pd.concat( 

155 [obs.context.to_frame().T for obs in observations] # type: ignore[union-attr] 

156 ) 

157 else: 

158 contexts = None 

159 if sum(obs.metadata is None for obs in observations) == 0: 

160 metadata = pd.concat( 

161 [obs.metadata.to_frame().T for obs in observations] # type: ignore[union-attr] 

162 ) 

163 else: 

164 metadata = None 

165 assert len(configs.index) == len( 

166 scores.index 

167 ), "config and score must have the same length" 

168 if contexts is not None: 

169 assert len(configs.index) == len( 

170 contexts.index 

171 ), "config and context must have the same length" 

172 if metadata is not None: 

173 assert len(configs.index) == len( 

174 metadata.index 

175 ), "config and metadata must have the same length" 

176 self._configs = configs.reset_index(drop=True) 

177 self._scores = scores.reset_index(drop=True) 

178 self._contexts = None if contexts is None else contexts.reset_index(drop=True) 

179 self._metadata = None if metadata is None else metadata.reset_index(drop=True) 

180 

181 @property 

182 def configs(self) -> pd.DataFrame: 

183 """Gets a copy of the configs of the Observations.""" 

184 return self._configs.copy() 

185 

186 @property 

187 def scores(self) -> pd.DataFrame: 

188 """Gets a copy of the scores of the Observations.""" 

189 return self._scores.copy() 

190 

191 @property 

192 def contexts(self) -> Optional[pd.DataFrame]: 

193 """Gets a copy of the contexts of the Observations.""" 

194 return self._contexts.copy() if self._contexts is not None else None 

195 

196 @property 

197 def metadata(self) -> Optional[pd.DataFrame]: 

198 """Gets a copy of the metadata of the Observations.""" 

199 return self._metadata.copy() if self._metadata is not None else None 

200 

201 def filter_by_index(self, index: pd.Index) -> "Observations": 

202 """ 

203 Filters the observation by the given indices. 

204 

205 Parameters 

206 ---------- 

207 index : pandas.Index 

208 The indices to filter by. 

209 

210 Returns 

211 ------- 

212 Observation 

213 The filtered observation. 

214 """ 

215 return Observations( 

216 configs=self._configs.loc[index].copy(), 

217 scores=self._scores.loc[index].copy(), 

218 contexts=None if self._contexts is None else self._contexts.loc[index].copy(), 

219 metadata=None if self._metadata is None else self._metadata.loc[index].copy(), 

220 ) 

221 

222 def append(self, observation: Observation) -> None: 

223 """ 

224 Appends the given observation to this observation. 

225 

226 Parameters 

227 ---------- 

228 observation : Observation 

229 The observation to append. 

230 """ 

231 config = observation.config.to_frame().T 

232 score = observation.score.to_frame().T 

233 context = None if observation.context is None else observation.context.to_frame().T 

234 metadata = None if observation.metadata is None else observation.metadata.to_frame().T 

235 if len(self._configs.index) == 0: 

236 self._configs = config 

237 self._scores = score 

238 self._contexts = context 

239 self._metadata = metadata 

240 assert set(self.configs.index) == set( 

241 self.scores.index 

242 ), "config and score must have the same index" 

243 return 

244 

245 self._configs = pd.concat([self._configs, config]).reset_index(drop=True) 

246 self._scores = pd.concat([self._scores, score]).reset_index(drop=True) 

247 assert set(self.configs.index) == set( 

248 self.scores.index 

249 ), "config and score must have the same index" 

250 

251 if self._contexts is not None: 

252 assert context is not None, ( 

253 "context of appending observation must not be null " 

254 "if context of prior observation is not null" 

255 ) 

256 self._contexts = pd.concat([self._contexts, context]).reset_index(drop=True) 

257 assert self._configs.index.equals( 

258 self._contexts.index 

259 ), "config and context must have the same index" 

260 else: 

261 assert context is None, ( 

262 "context of appending observation must be null " 

263 "if context of prior observation is null" 

264 ) 

265 if self._metadata is not None: 

266 assert metadata is not None, ( 

267 "context of appending observation must not be null " 

268 "if metadata of prior observation is not null" 

269 ) 

270 self._metadata = pd.concat([self._metadata, metadata]).reset_index(drop=True) 

271 assert self._configs.index.equals( 

272 self._metadata.index 

273 ), "config and metadata must have the same index" 

274 else: 

275 assert metadata is None, ( 

276 "context of appending observation must be null " 

277 "if metadata of prior observation is null" 

278 ) 

279 

280 def __len__(self) -> int: 

281 return len(self._configs.index) 

282 

283 def __iter__(self) -> Iterator["Observation"]: 

284 for idx in self._configs.index: 

285 yield Observation( 

286 config=self._configs.loc[idx], 

287 score=self._scores.loc[idx], 

288 context=None if self._contexts is None else self._contexts.loc[idx], 

289 metadata=None if self._metadata is None else self._metadata.loc[idx], 

290 ) 

291 

292 def __repr__(self) -> str: 

293 return ( 

294 f"Observation(configs={self._configs}, score={self._scores}, " 

295 "contexts={self._contexts}, metadata={self._metadata})" 

296 ) 

297 

298 def __eq__(self, other: Any) -> bool: 

299 if not isinstance(other, Observations): 

300 return False 

301 

302 if not self._configs.equals(other._configs): 

303 return False 

304 if not self._scores.equals(other._scores): 

305 return False 

306 if not compare_optional_dataframe(self._contexts, other._contexts): 

307 return False 

308 if not compare_optional_dataframe(self._metadata, other._metadata): 

309 return False 

310 

311 return True 

312 

313 # required as per: https://stackoverflow.com/questions/30643236/does-ne-use-an-overridden-eq 

314 def __ne__(self, other: Any) -> bool: 

315 return not self.__eq__(other) 

316 

317 

318class Suggestion: 

319 """ 

320 A single suggestion for a configuration. 

321 

322 A Suggestion is an Observation that has not yet been scored. Evaluating the 

323 Suggestion and calling `complete(scores)` can convert it to an Observation. 

324 """ 

325 

326 def __init__( 

327 self, 

328 *, 

329 config: pd.Series, 

330 context: Optional[pd.Series] = None, 

331 metadata: Optional[pd.Series] = None, 

332 ): 

333 """ 

334 Creates a new Suggestion. 

335 

336 Parameters 

337 ---------- 

338 config : pandas.Series 

339 The configuration suggested. 

340 context : Optional[pandas.Series] 

341 The context for this suggestion, by default None 

342 metadata : Optional[pandas.Series] 

343 Any metadata provided by the underlying optimizer, by default None 

344 """ 

345 self._config = config 

346 self._context = context 

347 self._metadata = metadata 

348 

349 @property 

350 def config(self) -> pd.Series: 

351 """Gets (a copy of) the config of the Suggestion.""" 

352 return self._config.copy() 

353 

354 @property 

355 def context(self) -> Optional[pd.Series]: 

356 """Gets (a copy of) the context of the Suggestion.""" 

357 return self._context.copy() if self._context is not None else None 

358 

359 @property 

360 def metadata(self) -> Optional[pd.Series]: 

361 """Gets (a copy of) the metadata of the Suggestion.""" 

362 return self._metadata.copy() if self._metadata is not None else None 

363 

364 def complete(self, score: pd.Series) -> Observation: 

365 """ 

366 Completes the Suggestion by adding a score to turn it into an Observation. 

367 

368 Parameters 

369 ---------- 

370 score : pandas.Series 

371 The score metrics observed. 

372 

373 Returns 

374 ------- 

375 Observation 

376 The observation of the suggestion. 

377 """ 

378 return Observation( 

379 config=self.config, 

380 score=score, 

381 context=self.context, 

382 metadata=self.metadata, 

383 ) 

384 

385 def to_configspace_config(self, space: ConfigurationSpace) -> Configuration: 

386 """ 

387 Convert a Configuration Space to a Configuration. 

388 

389 Parameters 

390 ---------- 

391 space : ConfigSpace.ConfigurationSpace 

392 The ConfigurationSpace to be converted. 

393 

394 Returns 

395 ------- 

396 ConfigSpace.Configuration 

397 The output Configuration. 

398 """ 

399 return Configuration(space, values=self._config.dropna().to_dict()) 

400 

401 def __repr__(self) -> str: 

402 return ( 

403 f"Suggestion(config={self._config}, context={self._context}, " 

404 "metadata={self._metadata})" 

405 ) 

406 

407 def __eq__(self, other: Any) -> bool: 

408 if not isinstance(other, Suggestion): 

409 return False 

410 

411 if not self._config.equals(other._config): 

412 return False 

413 if not compare_optional_series(self._context, other._context): 

414 return False 

415 if not compare_optional_series(self._metadata, other._metadata): 

416 return False 

417 

418 return True 

419 

420 def __ne__(self, other: Any) -> bool: 

421 return not self.__eq__(other)