Coverage for mlos_core/mlos_core/data

2# Copyright (c) Microsoft Corporation.

3# Licensed under the MIT License.

5"""

6Data classes for ``mlos_core`` used to pass around configurations, observations, and

7suggestions.

9``mlos_core`` uses :external:py:mod:`pandas` :external:py:class:`~pandas.DataFrame`

10s and :external:py:class:`~pandas.Series` to represent configurations and scores and

11context (information about where the configuration was evaluated).

13These modules encapsulate tuples of those for easier passing around and manipulation.

14"""

15from collections.abc import Iterable, Iterator

16from typing import Any

18import pandas as pd

19from ConfigSpace import Configuration, ConfigurationSpace

21from mlos_core.util import compare_optional_dataframe, compare_optional_series

24class Observation:

25 """A single observation of a configuration."""

27 def __init__(

28 self,

29 *,

30 config: pd.Series,

31 score: pd.Series = pd.Series(),

32 context: pd.Series | None = None,

33 metadata: pd.Series | None = None,

34 ):

35 """

36 Creates a new Observation object.

38 Parameters

39 ----------

40 config : pandas.Series

41 The configuration observed.

42 score : pandas.Series

43 The score metrics observed.

44 context : pandas.Series | None

45 The context in which the configuration was evaluated.

46 Not Yet Implemented.

47 metadata: pandas.Series | None

48 The metadata in which the configuration was evaluated

49 """

50 self._config = config

51 self._score = score

52 self._context = context

53 self._metadata = metadata

55 @property

56 def config(self) -> pd.Series:

57 """Gets (a copy of) the config of the Observation."""

58 return self._config.copy()

60 @property

61 def score(self) -> pd.Series:

62 """Gets (a copy of) the score of the Observation."""

63 return self._score.copy()

65 @property

66 def context(self) -> pd.Series | None:

67 """Gets (a copy of) the context of the Observation."""

68 return self._context.copy() if self._context is not None else None

70 @property

71 def metadata(self) -> pd.Series | None:

72 """Gets (a copy of) the metadata of the Observation."""

73 return self._metadata.copy() if self._metadata is not None else None

75 def to_suggestion(self) -> "Suggestion":

76 """

77 Converts the observation to a suggestion.

79 Returns

80 -------

81 Suggestion

82 The suggestion.

83 """

84 return Suggestion(

85 config=self.config,

86 context=self.context,

87 metadata=self.metadata,

88 )

90 def __repr__(self) -> str:

91 return (

92 f"Observation(config={self._config}, score={self._score}, "

93 f"context={self._context}, metadata={self._metadata})"

94 )

96 def __eq__(self, other: Any) -> bool:

97 if not isinstance(other, Observation):

98 return False

100 if not self._config.equals(other._config):

101 return False

102 if not self._score.equals(other._score):

103 return False

104 if not compare_optional_series(self._context, other._context):

105 return False

106 if not compare_optional_series(self._metadata, other._metadata):

107 return False

108

109 return True

110

111 def __ne__(self, other: Any) -> bool:

112 return not self.__eq__(other)

113

114

115class Observations:

116 """A set of observations of a configuration scores."""

117

118 def __init__( # pylint: disable=too-many-arguments

119 self,

120 *,

121 configs: pd.DataFrame = pd.DataFrame(),

122 scores: pd.DataFrame = pd.DataFrame(),

123 contexts: pd.DataFrame | None = None,

124 metadata: pd.DataFrame | None = None,

125 observations: Iterable[Observation] | None = None,

126 ):

127 """

128 Creates a new Observation object.

129

130 Can accept either a set of Observations or a collection of aligned config and

131 score (and optionally context) dataframes.

132

133 If both are provided the two sets will be merged.

134

135 Parameters

136 ----------

137 configs : pandas.DataFrame

138 Pandas dataframe containing configurations. Column names are the parameter names.

139 scores : pandas.DataFrame

140 The score metrics observed in a dataframe.

141 contexts : pandas.DataFrame | None

142 The context in which the configuration was evaluated.

143 Not Yet Implemented.

144 metadata: pandas.DataFrame | None

145 The metadata in which the configuration was evaluated

146 Not Yet Implemented.

147 """

148 if observations is None:

149 observations = []

150 if any(observations):

151 configs = pd.concat([obs.config.to_frame().T for obs in observations])

152 scores = pd.concat([obs.score.to_frame().T for obs in observations])

153

154 if sum(obs.context is None for obs in observations) == 0:

155 contexts = pd.concat(

156 [obs.context.to_frame().T for obs in observations] # type: ignore[union-attr]

157 )

158 else:

159 contexts = None

160 if sum(obs.metadata is None for obs in observations) == 0:

161 metadata = pd.concat(

162 [obs.metadata.to_frame().T for obs in observations] # type: ignore[union-attr]

163 )

164 else:

165 metadata = None

166 assert len(configs.index) == len(

167 scores.index

168 ), "config and score must have the same length"

169 if contexts is not None:

170 assert len(configs.index) == len(

171 contexts.index

172 ), "config and context must have the same length"

173 if metadata is not None:

174 assert len(configs.index) == len(

175 metadata.index

176 ), "config and metadata must have the same length"

177 self._configs = configs.reset_index(drop=True)

178 self._scores = scores.reset_index(drop=True)

179 self._contexts = None if contexts is None else contexts.reset_index(drop=True)

180 self._metadata = None if metadata is None else metadata.reset_index(drop=True)

181

182 @property

183 def configs(self) -> pd.DataFrame:

184 """Gets a copy of the configs of the Observations."""

185 return self._configs.copy()

186

187 @property

188 def scores(self) -> pd.DataFrame:

189 """Gets a copy of the scores of the Observations."""

190 return self._scores.copy()

191

192 @property

193 def contexts(self) -> pd.DataFrame | None:

194 """Gets a copy of the contexts of the Observations."""

195 return self._contexts.copy() if self._contexts is not None else None

196

197 @property

198 def metadata(self) -> pd.DataFrame | None:

199 """Gets a copy of the metadata of the Observations."""

200 return self._metadata.copy() if self._metadata is not None else None

201

202 def filter_by_index(self, index: pd.Index) -> "Observations":

203 """

204 Filters the observation by the given indices.

205

206 Parameters

207 ----------

208 index : pandas.Index

209 The indices to filter by.

210

211 Returns

212 -------

213 Observation

214 The filtered observation.

215 """

216 return Observations(

217 configs=self._configs.loc[index].copy(),

218 scores=self._scores.loc[index].copy(),

219 contexts=None if self._contexts is None else self._contexts.loc[index].copy(),

220 metadata=None if self._metadata is None else self._metadata.loc[index].copy(),

221 )

222

223 def append(self, observation: Observation) -> None:

224 """

225 Appends the given observation to this observation.

226

227 Parameters

228 ----------

229 observation : Observation

230 The observation to append.

231 """

232 config = observation.config.to_frame().T

233 score = observation.score.to_frame().T

234 context = None if observation.context is None else observation.context.to_frame().T

235 metadata = None if observation.metadata is None else observation.metadata.to_frame().T

236 if len(self._configs.index) == 0:

237 self._configs = config

238 self._scores = score

239 self._contexts = context

240 self._metadata = metadata

241 assert set(self.configs.index) == set(

242 self.scores.index

243 ), "config and score must have the same index"

244 return

245

246 self._configs = pd.concat([self._configs, config]).reset_index(drop=True)

247 self._scores = pd.concat([self._scores, score]).reset_index(drop=True)

248 assert set(self.configs.index) == set(

249 self.scores.index

250 ), "config and score must have the same index"

251

252 if self._contexts is not None:

253 assert context is not None, (

254 "context of appending observation must not be null "

255 "if context of prior observation is not null"

256 )

257 self._contexts = pd.concat([self._contexts, context]).reset_index(drop=True)

258 assert self._configs.index.equals(

259 self._contexts.index

260 ), "config and context must have the same index"

261 else:

262 assert context is None, (

263 "context of appending observation must be null "

264 "if context of prior observation is null"

265 )

266 if self._metadata is not None:

267 assert metadata is not None, (

268 "context of appending observation must not be null "

269 "if metadata of prior observation is not null"

270 )

271 self._metadata = pd.concat([self._metadata, metadata]).reset_index(drop=True)

272 assert self._configs.index.equals(

273 self._metadata.index

274 ), "config and metadata must have the same index"

275 else:

276 assert metadata is None, (

277 "context of appending observation must be null "

278 "if metadata of prior observation is null"

279 )

280

281 def __len__(self) -> int:

282 return len(self._configs.index)

283

284 def __iter__(self) -> Iterator["Observation"]:

285 for idx in self._configs.index:

286 yield Observation(

287 config=self._configs.loc[idx],

288 score=self._scores.loc[idx],

289 context=None if self._contexts is None else self._contexts.loc[idx],

290 metadata=None if self._metadata is None else self._metadata.loc[idx],

291 )

292

293 def __repr__(self) -> str:

294 return (

295 f"Observation(configs={self._configs}, score={self._scores}, "

296 "contexts={self._contexts}, metadata={self._metadata})"

297 )

298

299 def __eq__(self, other: Any) -> bool:

300 if not isinstance(other, Observations):

301 return False

302

303 if not self._configs.equals(other._configs):

304 return False

305 if not self._scores.equals(other._scores):

306 return False

307 if not compare_optional_dataframe(self._contexts, other._contexts):

308 return False

309 if not compare_optional_dataframe(self._metadata, other._metadata):

310 return False

311

312 return True

313

314 # required as per: https://stackoverflow.com/questions/30643236/does-ne-use-an-overridden-eq

315 def __ne__(self, other: Any) -> bool:

316 return not self.__eq__(other)

317

318

319class Suggestion:

320 """

321 A single suggestion for a configuration.

322

323 A Suggestion is an Observation that has not yet been scored. Evaluating the

324 Suggestion and calling `complete(scores)` can convert it to an Observation.

325 """

326

327 def __init__(

328 self,

329 *,

330 config: pd.Series,

331 context: pd.Series | None = None,

332 metadata: pd.Series | None = None,

333 ):

334 """

335 Creates a new Suggestion.

336

337 Parameters

338 ----------

339 config : pandas.Series

340 The configuration suggested.

341 context : pandas.Series | None

342 The context for this suggestion, by default None

343 metadata : pandas.Series | None

344 Any metadata provided by the underlying optimizer, by default None

345 """

346 self._config = config

347 self._context = context

348 self._metadata = metadata

349

350 @property

351 def config(self) -> pd.Series:

352 """Gets (a copy of) the config of the Suggestion."""

353 return self._config.copy()

354

355 @property

356 def context(self) -> pd.Series | None:

357 """Gets (a copy of) the context of the Suggestion."""

358 return self._context.copy() if self._context is not None else None

359

360 @property

361 def metadata(self) -> pd.Series | None:

362 """Gets (a copy of) the metadata of the Suggestion."""

363 return self._metadata.copy() if self._metadata is not None else None

364

365 def complete(self, score: pd.Series) -> Observation:

366 """

367 Completes the Suggestion by adding a score to turn it into an Observation.

368

369 Parameters

370 ----------

371 score : pandas.Series

372 The score metrics observed.

373

374 Returns

375 -------

376 Observation

377 The observation of the suggestion.

378 """

379 return Observation(

380 config=self.config,

381 score=score,

382 context=self.context,

383 metadata=self.metadata,

384 )

385

386 def to_configspace_config(self, space: ConfigurationSpace) -> Configuration:

387 """

388 Convert a Configuration Space to a Configuration.

389

390 Parameters

391 ----------

392 space : ConfigSpace.ConfigurationSpace

393 The ConfigurationSpace to be converted.

394

395 Returns

396 -------

397 ConfigSpace.Configuration

398 The output Configuration.

399 """

400 return Configuration(space, values=self._config.dropna().to_dict())

401

402 def __repr__(self) -> str:

403 return (

404 f"Suggestion(config={self._config}, context={self._context}, "

405 "metadata={self._metadata})"

406 )

407

408 def __eq__(self, other: Any) -> bool:

409 if not isinstance(other, Suggestion):

410 return False

411

412 if not self._config.equals(other._config):

413 return False

414 if not compare_optional_series(self._context, other._context):

415 return False

416 if not compare_optional_series(self._metadata, other._metadata):

417 return False

418

419 return True

420

421 def __ne__(self, other: Any) -> bool:

422 return not self.__eq__(other)

Coverage for mlos_core/mlos_core/data_classes.py: 93%

149 statements