Coverage for mlos_viz/mlos

2# Copyright (c) Microsoft Corporation.

3# Licensed under the MIT License.

5"""Base functions for visualizing, explain, and gain insights from results."""

7import re

8import warnings

9from collections.abc import Callable, Iterable

10from importlib.metadata import version

11from typing import Any, Literal

13import pandas

14import seaborn as sns

15from matplotlib import pyplot as plt

16from pandas.api.types import is_numeric_dtype

17from pandas.core.groupby.generic import SeriesGroupBy

19from mlos_bench.storage.base_experiment_data import ExperimentData

20from mlos_viz.util import expand_results_data_args

22_SEABORN_VERS = version("seaborn")

25def _get_kwarg_defaults(target: Callable, **kwargs: Any) -> dict[str, Any]:

26 """

27 Assembles a smaller kwargs dict for the specified target function.

29 Note: this only works with non-positional kwargs (e.g., those after a * arg).

30 """

31 target_kwargs = {}

32 for kword in target.__kwdefaults__: # or {} # intentionally omitted for now

33 if kword in kwargs:

34 target_kwargs[kword] = kwargs[kword]

35 return target_kwargs

38def ignore_plotter_warnings() -> None:

39 """Suppress some annoying warnings from third-party data visualization packages by

40 adding them to the warnings filter.

41 """

42 warnings.filterwarnings("ignore", category=FutureWarning)

43 if _SEABORN_VERS <= "0.13.1":

44 warnings.filterwarnings(

45 "ignore",

46 category=DeprecationWarning,

47 module="seaborn", # but actually comes from pandas

48 message="is_categorical_dtype is deprecated and will be removed in a future version.",

49 )

50 # See Also: https://github.com/mwaskom/seaborn/issues/3804

51 warnings.filterwarnings(

52 "ignore",

53 category=PendingDeprecationWarning,

54 module="seaborn", # but actually comes from matplotlib

55 message=(

56 "vert: bool will be deprecated in a future version. "

57 "Use orientation: {'vertical', 'horizontal'} instead."

58 ),

59 )

62def _add_groupby_desc_column(

63 results_df: pandas.DataFrame,

64 groupby_columns: list[str] | None = None,

65) -> tuple[pandas.DataFrame, list[str], str]:

66 """

67 Adds a group descriptor column to the results_df.

69 Parameters

70 ----------

71 results_df: ExperimentData

72 The experiment data to add the descriptor column to.

73 groupby_columns: Optional[list[str]]

74 """

75 # Compose a new groupby_column for display purposes that is the

76 # concatenation of the min trial_id (the first one) of each config trial

77 # group and the config_id.

78 # Note: It's need to be a string (e.g., categorical) for boxplot and lineplot to

79 # be on the same axis anyways.

80 if groupby_columns is None:

81 groupby_columns = ["tunable_config_trial_group_id", "tunable_config_id"]

82 groupby_column = ",".join(groupby_columns)

83 results_df[groupby_column] = (

84 results_df[groupby_columns].astype(str).apply(",".join, axis=1)

85 ) # pylint: disable=unnecessary-lambda

86 groupby_columns.append(groupby_column)

87 return (results_df, groupby_columns, groupby_column)

90def augment_results_df_with_config_trial_group_stats(

91 exp_data: ExperimentData | None = None,

92 *,

93 results_df: pandas.DataFrame | None = None,

94 requested_result_cols: Iterable[str] | None = None,

95) -> pandas.DataFrame:

96 # pylint: disable=too-complex

97 """

98 Add a number of useful statistical measure columns to the results dataframe.

100 In particular, for each numeric result, we add the following columns for each

101 requested result column:

102

103 - ".p50": the median of each config trial group results

104

105 - ".p75": the p75 of each config trial group results

106

107 - ".p90": the p90 of each config trial group results

108

109 - ".p95": the p95 of each config trial group results

110

111 - ".p99": the p95 of each config trial group results

112

113 - ".mean": the mean of each config trial group results

114

115 - ".stddev": the mean of each config trial group results

116

117 - ".var": the variance of each config trial group results

118

119 - ".var_zscore": the zscore of this group (i.e., variance relative to the stddev

120 of all group variances). This can be useful for filtering out outliers (e.g.,

121 configs with high variance relative to others by restricting to abs < 2 to

122 remove those two standard deviations from the mean variance across all config

123 trial groups).

124

125 Additionally, we add a "tunable_config_trial_group_size" column that indicates

126 the number of trials using a particular config.

127

128 Parameters

129 ----------

130 exp_data : ExperimentData

131 The ExperimentData (e.g., obtained from the storage layer) to plot.

132 results_df : pandas.DataFrame | None

133 The results dataframe to augment, by default None to use the results_df property.

134 requested_result_cols : Optional[Iterable[str]]

135 Which results columns to augment, by default None to use all results columns

136 that look numeric.

137

138 Returns

139 -------

140 pandas.DataFrame

141 The augmented results dataframe.

142 """

143 if results_df is None:

144 if exp_data is None:

145 raise ValueError("Either exp_data or results_df must be provided.")

146 results_df = exp_data.results_df

147 results_groups = results_df.groupby("tunable_config_id")

148 if len(results_groups) <= 1:

149 raise ValueError(f"Not enough data: {len(results_groups)}")

150

151 if requested_result_cols is None:

152 result_cols = {

153 col

154 for col in results_df.columns

155 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX)

156 }

157 else:

158 result_cols = {

159 col

160 for col in requested_result_cols

161 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and col in results_df.columns

162 }

163 result_cols.update(

164 {

165 ExperimentData.RESULT_COLUMN_PREFIX + col

166 for col in requested_result_cols

167 if ExperimentData.RESULT_COLUMN_PREFIX in results_df.columns

168 }

169 )

170

171 def compute_zscore_for_group_agg(

172 results_groups_perf: "SeriesGroupBy",

173 stats_df: pandas.DataFrame,

174 result_col: str,

175 agg: Literal["mean"] | Literal["var"] | Literal["std"],

176 ) -> None:

177 results_groups_perf_aggs = results_groups_perf.agg(agg) # TODO: avoid recalculating?

178 # Compute the zscore of the chosen aggregate performance of each group into

179 # each row in the dataframe.

180 stats_df[result_col + f".{agg}_mean"] = results_groups_perf_aggs.mean()

181 stats_df[result_col + f".{agg}_stddev"] = results_groups_perf_aggs.std()

182 stats_df[result_col + f".{agg}_zscore"] = (

183 stats_df[result_col + f".{agg}"] - stats_df[result_col + f".{agg}_mean"]

184 ) / stats_df[result_col + f".{agg}_stddev"]

185 stats_df.drop(

186 columns=[result_col + ".var_" + agg for agg in ("mean", "stddev")], inplace=True

187 )

188

189 augmented_results_df = results_df

190 augmented_results_df["tunable_config_trial_group_size"] = results_groups["trial_id"].transform(

191 "count"

192 )

193 for result_col in result_cols:

194 if not result_col.startswith(ExperimentData.RESULT_COLUMN_PREFIX):

195 continue

196 if re.search(r"(start|end).*time", result_col, flags=re.IGNORECASE):

197 # Ignore computing variance on things like that look like timestamps.

198 continue

199 if not is_numeric_dtype(results_df[result_col]):

200 continue

201 if results_df[result_col].unique().size == 1:

202 continue

203 results_groups_perf = results_groups[result_col]

204 stats_df = pandas.DataFrame()

205 stats_df[result_col + ".mean"] = results_groups_perf.transform("mean", numeric_only=True)

206 stats_df[result_col + ".var"] = results_groups_perf.transform("var")

207 stats_df[result_col + ".stddev"] = stats_df[result_col + ".var"].apply(lambda x: x**0.5)

208

209 compute_zscore_for_group_agg(results_groups_perf, stats_df, result_col, "var")

210 quantiles = [0.50, 0.75, 0.90, 0.95, 0.99]

211 for quantile in quantiles: # TODO: can we do this in one pass?

212 quantile_col = f"{result_col}.p{int(quantile * 100)}"

213 stats_df[quantile_col] = results_groups_perf.transform("quantile", quantile)

214 augmented_results_df = pandas.concat([augmented_results_df, stats_df], axis=1)

215 return augmented_results_df

216

217

218def limit_top_n_configs(

219 exp_data: ExperimentData | None = None,

220 *,

221 results_df: pandas.DataFrame | None = None,

222 objectives: dict[str, Literal["min", "max"]] | None = None,

223 top_n_configs: int = 10,

224 method: Literal["mean", "p50", "p75", "p90", "p95", "p99"] = "mean",

225) -> tuple[pandas.DataFrame, list[int], dict[str, bool]]:

226 # pylint: disable=too-many-locals

227 """

228 Utility function to process the results and determine the best performing configs

229 including potential repeats to help assess variability.

230

231 Parameters

232 ----------

233 exp_data : ExperimentData | None

234 The ExperimentData (e.g., obtained from the storage layer) to operate on.

235 results_df : pandas.DataFrame | None

236 The results dataframe to augment, by default None to use

237 :py:attr:`.ExperimentData.results_df` property.

238 objectives : Iterable[str]

239 Which result column(s) to use for sorting the configs, and in which

240 direction ("min" or "max").

241 By default None to automatically select the :py:attr:`.ExperimentData.objectives`.

242 top_n_configs : int

243 How many configs to return, including the default, by default 10.

244 method: Literal["mean", "median", "p50", "p75", "p90", "p95", "p99"] = "mean",

245 Which statistical method to use when sorting the config groups before

246 determining the cutoff, by default "mean".

247

248 Returns

249 -------

250 (top_n_config_results_df, top_n_config_ids, orderby_cols) :

251 tuple[pandas.DataFrame, list[int], dict[str, bool]]

252 The filtered results dataframe, the config ids, and the columns used to

253 order the configs.

254 """

255 # Do some input checking first.

256 if method not in ["mean", "median", "p50", "p75", "p90", "p95", "p99"]:

257 raise ValueError(f"Invalid method: {method}")

258

259 # Prepare the orderby columns.

260 (results_df, objs_cols) = expand_results_data_args(

261 exp_data,

262 results_df=results_df,

263 objectives=objectives,

264 )

265 assert isinstance(results_df, pandas.DataFrame)

266

267 # Augment the results dataframe with some useful stats.

268 results_df = augment_results_df_with_config_trial_group_stats(

269 exp_data=exp_data,

270 results_df=results_df,

271 requested_result_cols=objs_cols.keys(),

272 )

273 # Note: mypy seems to lose its mind for some reason and keeps forgetting that

274 # results_df is not None and is in fact a DataFrame, so we periodically assert

275 # it in this func for now.

276 assert results_df is not None

277 orderby_cols: dict[str, bool] = {

278 obj_col + f".{method}": ascending for (obj_col, ascending) in objs_cols.items()

279 }

280

281 config_id_col = "tunable_config_id"

282 group_id_col = "tunable_config_trial_group_id" # first trial_id per config group

283 trial_id_col = "trial_id"

284

285 default_config_id = (

286 results_df[trial_id_col].min() if exp_data is None else exp_data.default_tunable_config_id

287 )

288 assert default_config_id is not None, "Failed to determine default config id."

289

290 # Filter out configs whose variance is too large.

291 # But also make sure the default configs is still in the resulting dataframe

292 # (for comparison purposes).

293 for obj_col in objs_cols:

294 assert results_df is not None

295 if method == "mean":

296 singletons_mask = results_df["tunable_config_trial_group_size"] == 1

297 else:

298 singletons_mask = results_df["tunable_config_trial_group_size"] > 1

299 results_df = results_df.loc[

300 (

301 (results_df[f"{obj_col}.var_zscore"].abs() < 2)

302 | (singletons_mask)

303 | (results_df[config_id_col] == default_config_id)

304 )

305 ]

306 assert results_df is not None

307

308 # Also, filter results that are worse than the default.

309 default_config_results_df = results_df.loc[results_df[config_id_col] == default_config_id]

310 for orderby_col, ascending in orderby_cols.items():

311 default_vals = default_config_results_df[orderby_col].unique()

312 assert len(default_vals) == 1

313 default_val = default_vals[0]

314 assert results_df is not None

315 if ascending:

316 results_df = results_df.loc[(results_df[orderby_col] <= default_val)]

317 else:

318 results_df = results_df.loc[(results_df[orderby_col] >= default_val)]

319

320 # Now regroup and filter to the top-N configs by their group performance dimensions.

321 assert results_df is not None

322 group_results_df: pandas.DataFrame = results_df.groupby(config_id_col).first()[

323 orderby_cols.keys()

324 ]

325 top_n_config_ids: list[int] = (

326 group_results_df.sort_values(

327 by=list(orderby_cols.keys()), ascending=list(orderby_cols.values())

328 )

329 .head(top_n_configs)

330 .index.tolist()

331 )

332

333 # Remove the default config if it's included. We'll add it back later.

334 if default_config_id in top_n_config_ids:

335 top_n_config_ids.remove(default_config_id)

336 # Get just the top-n config results.

337 # Sort by the group ids.

338 top_n_config_results_df = results_df.loc[

339 (results_df[config_id_col].isin(top_n_config_ids))

340 ].sort_values([group_id_col, config_id_col, trial_id_col])

341 # Place the default config at the top of the list.

342 top_n_config_ids.insert(0, default_config_id)

343 top_n_config_results_df = pandas.concat(

344 [default_config_results_df, top_n_config_results_df],

345 axis=0,

346 )

347 return (top_n_config_results_df, top_n_config_ids, orderby_cols)

348

349

350def plot_optimizer_trends(

351 exp_data: ExperimentData | None = None,

352 *,

353 results_df: pandas.DataFrame | None = None,

354 objectives: dict[str, Literal["min", "max"]] | None = None,

355) -> None:

356 """

357 Plots the optimizer trends for the Experiment.

358

359 Parameters

360 ----------

361 exp_data : ExperimentData

362 The ExperimentData (e.g., obtained from the storage layer) to plot.

363 results_df : pandas.DataFrame | None

364 Optional results_df to plot.

365 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property.

366 objectives : Optional[dict[str, Literal["min", "max"]]]

367 Optional objectives to plot.

368 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property.

369 """

370 (results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives)

371 (results_df, groupby_columns, groupby_column) = _add_groupby_desc_column(results_df)

372

373 for objective_column, ascending in obj_cols.items():

374 incumbent_column = objective_column + ".incumbent"

375

376 # Determine the mean of each config trial group to match the box plots.

377 group_results_df = (

378 results_df.groupby(groupby_columns)[objective_column]

379 .mean()

380 .reset_index()

381 .sort_values(groupby_columns)

382 )

383 #

384 # Note: technically the optimizer (usually) uses the *first* result for a

385 # given config trial group before moving on to a new config (x-axis), so

386 # plotting the mean may be slightly misleading when trying to understand the

387 # actual path taken by the optimizer in case of high variance samples.

388 # Here's a way to do that, though it can also be misleading if the optimizer

389 # later gets a worse value for that config group as well.

390 #

391 # group_results_df = results_df.sort_values(groupby_columns + ["trial_id"]).groupby(

392 # groupby_columns).head(1)[groupby_columns + [objective_column]].reset_index()

393

394 # Calculate the incumbent (best seen so far)

395 if ascending:

396 group_results_df[incumbent_column] = group_results_df[objective_column].cummin()

397 else:

398 group_results_df[incumbent_column] = group_results_df[objective_column].cummax()

399

400 (_fig, axis) = plt.subplots(figsize=(15, 5))

401

402 # Result of each set of trials for a config

403 sns.boxplot(

404 data=results_df,

405 x=groupby_column,

406 y=objective_column,

407 ax=axis,

408 )

409

410 # Results of the best so far.

411 axis = sns.lineplot(

412 data=group_results_df,

413 x=groupby_column,

414 y=incumbent_column,

415 alpha=0.7,

416 label="Mean of Incumbent Config Trial Group",

417 ax=axis,

418 )

419

420 plt.yscale("log")

421 plt.ylabel(objective_column.replace(ExperimentData.RESULT_COLUMN_PREFIX, ""))

422

423 plt.xlabel("Config Trial Group ID, Config ID")

424 plt.xticks(rotation=90, fontsize=8)

425

426 plt.title(

427 "Optimizer Trends for Experiment: " + exp_data.experiment_id

428 if exp_data is not None

429 else ""

430 )

431 plt.grid()

432 plt.show()

433

434

435def plot_top_n_configs(

436 exp_data: ExperimentData | None = None,

437 *,

438 results_df: pandas.DataFrame | None = None,

439 objectives: dict[str, Literal["min", "max"]] | None = None,

440 with_scatter_plot: bool = False,

441 **kwargs: Any,

442) -> None:

443 # pylint: disable=too-many-locals

444 """

445 Plots the top-N configs along with the default config for the given

446 :py:class:`.ExperimentData`.

447

448 Intended to be used from a Jupyter notebook.

449

450 Parameters

451 ----------

452 exp_data: ExperimentData

453 The experiment data to plot.

454 results_df : pandas.DataFrame | None

455 Optional results_df to plot.

456 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property.

457 objectives : Optional[dict[str, Literal["min", "max"]]]

458 Optional objectives to plot.

459 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property.

460 with_scatter_plot : bool

461 Whether to also add scatter plot to the output figure.

462 kwargs : dict

463 Remaining keyword arguments are passed along to the

464 :py:func:`limit_top_n_configs` function.

465 """

466 (results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives)

467 top_n_config_args = _get_kwarg_defaults(limit_top_n_configs, **kwargs)

468 if "results_df" not in top_n_config_args:

469 top_n_config_args["results_df"] = results_df

470 if "objectives" not in top_n_config_args:

471 top_n_config_args["objectives"] = objectives

472 (top_n_config_results_df, _top_n_config_ids, orderby_cols) = limit_top_n_configs(

473 exp_data=exp_data,

474 **top_n_config_args,

475 )

476

477 (top_n_config_results_df, _groupby_columns, groupby_column) = _add_groupby_desc_column(

478 top_n_config_results_df,

479 )

480 top_n = len(top_n_config_results_df[groupby_column].unique()) - 1

481

482 for orderby_col, ascending in orderby_cols.items():

483 opt_tgt = orderby_col.replace(ExperimentData.RESULT_COLUMN_PREFIX, "")

484 (_fig, axis) = plt.subplots()

485 sns.violinplot(

486 data=top_n_config_results_df,

487 x=groupby_column,

488 y=orderby_col,

489 ax=axis,

490 )

491 if with_scatter_plot:

492 sns.scatterplot(

493 data=top_n_config_results_df,

494 x=groupby_column,

495 y=orderby_col,

496 legend=False,

497 ax=axis,

498 )

499 plt.grid()

500 (xticks, xlabels) = plt.xticks()

501 # default should be in the first position based on top_n_configs() return

502 xlabels[0] = "default" # type: ignore[call-overload]

503 plt.xticks(xticks, xlabels) # type: ignore[arg-type]

504 plt.xlabel("Config Trial Group, Config ID")

505 plt.xticks(rotation=90)

506 plt.ylabel(opt_tgt)

507 plt.yscale("log")

508 extra_title = "(lower is better)" if ascending else "(lower is better)"

509 plt.title(f"Top {top_n} configs {opt_tgt} {extra_title}")

510 plt.show()

Coverage for mlos_viz/mlos_viz/base.py: 90%

156 statements