Coverage for mlos_viz/mlos_viz/base.py: 90%
156 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Base functions for visualizing, explain, and gain insights from results."""
7import re
8import warnings
9from collections.abc import Callable, Iterable
10from importlib.metadata import version
11from typing import Any, Literal
13import pandas
14import seaborn as sns
15from matplotlib import pyplot as plt
16from pandas.api.types import is_numeric_dtype
17from pandas.core.groupby.generic import SeriesGroupBy
19from mlos_bench.storage.base_experiment_data import ExperimentData
20from mlos_viz.util import expand_results_data_args
22_SEABORN_VERS = version("seaborn")
25def _get_kwarg_defaults(target: Callable, **kwargs: Any) -> dict[str, Any]:
26 """
27 Assembles a smaller kwargs dict for the specified target function.
29 Note: this only works with non-positional kwargs (e.g., those after a * arg).
30 """
31 target_kwargs = {}
32 for kword in target.__kwdefaults__: # or {} # intentionally omitted for now
33 if kword in kwargs:
34 target_kwargs[kword] = kwargs[kword]
35 return target_kwargs
38def ignore_plotter_warnings() -> None:
39 """Suppress some annoying warnings from third-party data visualization packages by
40 adding them to the warnings filter.
41 """
42 warnings.filterwarnings("ignore", category=FutureWarning)
43 if _SEABORN_VERS <= "0.13.1":
44 warnings.filterwarnings(
45 "ignore",
46 category=DeprecationWarning,
47 module="seaborn", # but actually comes from pandas
48 message="is_categorical_dtype is deprecated and will be removed in a future version.",
49 )
50 # See Also: https://github.com/mwaskom/seaborn/issues/3804
51 warnings.filterwarnings(
52 "ignore",
53 category=PendingDeprecationWarning,
54 module="seaborn", # but actually comes from matplotlib
55 message=(
56 "vert: bool will be deprecated in a future version. "
57 "Use orientation: {'vertical', 'horizontal'} instead."
58 ),
59 )
62def _add_groupby_desc_column(
63 results_df: pandas.DataFrame,
64 groupby_columns: list[str] | None = None,
65) -> tuple[pandas.DataFrame, list[str], str]:
66 """
67 Adds a group descriptor column to the results_df.
69 Parameters
70 ----------
71 results_df: ExperimentData
72 The experiment data to add the descriptor column to.
73 groupby_columns: Optional[list[str]]
74 """
75 # Compose a new groupby_column for display purposes that is the
76 # concatenation of the min trial_id (the first one) of each config trial
77 # group and the config_id.
78 # Note: It's need to be a string (e.g., categorical) for boxplot and lineplot to
79 # be on the same axis anyways.
80 if groupby_columns is None:
81 groupby_columns = ["tunable_config_trial_group_id", "tunable_config_id"]
82 groupby_column = ",".join(groupby_columns)
83 results_df[groupby_column] = (
84 results_df[groupby_columns].astype(str).apply(",".join, axis=1)
85 ) # pylint: disable=unnecessary-lambda
86 groupby_columns.append(groupby_column)
87 return (results_df, groupby_columns, groupby_column)
90def augment_results_df_with_config_trial_group_stats(
91 exp_data: ExperimentData | None = None,
92 *,
93 results_df: pandas.DataFrame | None = None,
94 requested_result_cols: Iterable[str] | None = None,
95) -> pandas.DataFrame:
96 # pylint: disable=too-complex
97 """
98 Add a number of useful statistical measure columns to the results dataframe.
100 In particular, for each numeric result, we add the following columns for each
101 requested result column:
103 - ".p50": the median of each config trial group results
105 - ".p75": the p75 of each config trial group results
107 - ".p90": the p90 of each config trial group results
109 - ".p95": the p95 of each config trial group results
111 - ".p99": the p95 of each config trial group results
113 - ".mean": the mean of each config trial group results
115 - ".stddev": the mean of each config trial group results
117 - ".var": the variance of each config trial group results
119 - ".var_zscore": the zscore of this group (i.e., variance relative to the stddev
120 of all group variances). This can be useful for filtering out outliers (e.g.,
121 configs with high variance relative to others by restricting to abs < 2 to
122 remove those two standard deviations from the mean variance across all config
123 trial groups).
125 Additionally, we add a "tunable_config_trial_group_size" column that indicates
126 the number of trials using a particular config.
128 Parameters
129 ----------
130 exp_data : ExperimentData
131 The ExperimentData (e.g., obtained from the storage layer) to plot.
132 results_df : pandas.DataFrame | None
133 The results dataframe to augment, by default None to use the results_df property.
134 requested_result_cols : Optional[Iterable[str]]
135 Which results columns to augment, by default None to use all results columns
136 that look numeric.
138 Returns
139 -------
140 pandas.DataFrame
141 The augmented results dataframe.
142 """
143 if results_df is None:
144 if exp_data is None:
145 raise ValueError("Either exp_data or results_df must be provided.")
146 results_df = exp_data.results_df
147 results_groups = results_df.groupby("tunable_config_id")
148 if len(results_groups) <= 1:
149 raise ValueError(f"Not enough data: {len(results_groups)}")
151 if requested_result_cols is None:
152 result_cols = {
153 col
154 for col in results_df.columns
155 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX)
156 }
157 else:
158 result_cols = {
159 col
160 for col in requested_result_cols
161 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and col in results_df.columns
162 }
163 result_cols.update(
164 {
165 ExperimentData.RESULT_COLUMN_PREFIX + col
166 for col in requested_result_cols
167 if ExperimentData.RESULT_COLUMN_PREFIX in results_df.columns
168 }
169 )
171 def compute_zscore_for_group_agg(
172 results_groups_perf: "SeriesGroupBy",
173 stats_df: pandas.DataFrame,
174 result_col: str,
175 agg: Literal["mean"] | Literal["var"] | Literal["std"],
176 ) -> None:
177 results_groups_perf_aggs = results_groups_perf.agg(agg) # TODO: avoid recalculating?
178 # Compute the zscore of the chosen aggregate performance of each group into
179 # each row in the dataframe.
180 stats_df[result_col + f".{agg}_mean"] = results_groups_perf_aggs.mean()
181 stats_df[result_col + f".{agg}_stddev"] = results_groups_perf_aggs.std()
182 stats_df[result_col + f".{agg}_zscore"] = (
183 stats_df[result_col + f".{agg}"] - stats_df[result_col + f".{agg}_mean"]
184 ) / stats_df[result_col + f".{agg}_stddev"]
185 stats_df.drop(
186 columns=[result_col + ".var_" + agg for agg in ("mean", "stddev")], inplace=True
187 )
189 augmented_results_df = results_df
190 augmented_results_df["tunable_config_trial_group_size"] = results_groups["trial_id"].transform(
191 "count"
192 )
193 for result_col in result_cols:
194 if not result_col.startswith(ExperimentData.RESULT_COLUMN_PREFIX):
195 continue
196 if re.search(r"(start|end).*time", result_col, flags=re.IGNORECASE):
197 # Ignore computing variance on things like that look like timestamps.
198 continue
199 if not is_numeric_dtype(results_df[result_col]):
200 continue
201 if results_df[result_col].unique().size == 1:
202 continue
203 results_groups_perf = results_groups[result_col]
204 stats_df = pandas.DataFrame()
205 stats_df[result_col + ".mean"] = results_groups_perf.transform("mean", numeric_only=True)
206 stats_df[result_col + ".var"] = results_groups_perf.transform("var")
207 stats_df[result_col + ".stddev"] = stats_df[result_col + ".var"].apply(lambda x: x**0.5)
209 compute_zscore_for_group_agg(results_groups_perf, stats_df, result_col, "var")
210 quantiles = [0.50, 0.75, 0.90, 0.95, 0.99]
211 for quantile in quantiles: # TODO: can we do this in one pass?
212 quantile_col = f"{result_col}.p{int(quantile * 100)}"
213 stats_df[quantile_col] = results_groups_perf.transform("quantile", quantile)
214 augmented_results_df = pandas.concat([augmented_results_df, stats_df], axis=1)
215 return augmented_results_df
218def limit_top_n_configs(
219 exp_data: ExperimentData | None = None,
220 *,
221 results_df: pandas.DataFrame | None = None,
222 objectives: dict[str, Literal["min", "max"]] | None = None,
223 top_n_configs: int = 10,
224 method: Literal["mean", "p50", "p75", "p90", "p95", "p99"] = "mean",
225) -> tuple[pandas.DataFrame, list[int], dict[str, bool]]:
226 # pylint: disable=too-many-locals
227 """
228 Utility function to process the results and determine the best performing configs
229 including potential repeats to help assess variability.
231 Parameters
232 ----------
233 exp_data : ExperimentData | None
234 The ExperimentData (e.g., obtained from the storage layer) to operate on.
235 results_df : pandas.DataFrame | None
236 The results dataframe to augment, by default None to use
237 :py:attr:`.ExperimentData.results_df` property.
238 objectives : Iterable[str]
239 Which result column(s) to use for sorting the configs, and in which
240 direction ("min" or "max").
241 By default None to automatically select the :py:attr:`.ExperimentData.objectives`.
242 top_n_configs : int
243 How many configs to return, including the default, by default 10.
244 method: Literal["mean", "median", "p50", "p75", "p90", "p95", "p99"] = "mean",
245 Which statistical method to use when sorting the config groups before
246 determining the cutoff, by default "mean".
248 Returns
249 -------
250 (top_n_config_results_df, top_n_config_ids, orderby_cols) :
251 tuple[pandas.DataFrame, list[int], dict[str, bool]]
252 The filtered results dataframe, the config ids, and the columns used to
253 order the configs.
254 """
255 # Do some input checking first.
256 if method not in ["mean", "median", "p50", "p75", "p90", "p95", "p99"]:
257 raise ValueError(f"Invalid method: {method}")
259 # Prepare the orderby columns.
260 (results_df, objs_cols) = expand_results_data_args(
261 exp_data,
262 results_df=results_df,
263 objectives=objectives,
264 )
265 assert isinstance(results_df, pandas.DataFrame)
267 # Augment the results dataframe with some useful stats.
268 results_df = augment_results_df_with_config_trial_group_stats(
269 exp_data=exp_data,
270 results_df=results_df,
271 requested_result_cols=objs_cols.keys(),
272 )
273 # Note: mypy seems to lose its mind for some reason and keeps forgetting that
274 # results_df is not None and is in fact a DataFrame, so we periodically assert
275 # it in this func for now.
276 assert results_df is not None
277 orderby_cols: dict[str, bool] = {
278 obj_col + f".{method}": ascending for (obj_col, ascending) in objs_cols.items()
279 }
281 config_id_col = "tunable_config_id"
282 group_id_col = "tunable_config_trial_group_id" # first trial_id per config group
283 trial_id_col = "trial_id"
285 default_config_id = (
286 results_df[trial_id_col].min() if exp_data is None else exp_data.default_tunable_config_id
287 )
288 assert default_config_id is not None, "Failed to determine default config id."
290 # Filter out configs whose variance is too large.
291 # But also make sure the default configs is still in the resulting dataframe
292 # (for comparison purposes).
293 for obj_col in objs_cols:
294 assert results_df is not None
295 if method == "mean":
296 singletons_mask = results_df["tunable_config_trial_group_size"] == 1
297 else:
298 singletons_mask = results_df["tunable_config_trial_group_size"] > 1
299 results_df = results_df.loc[
300 (
301 (results_df[f"{obj_col}.var_zscore"].abs() < 2)
302 | (singletons_mask)
303 | (results_df[config_id_col] == default_config_id)
304 )
305 ]
306 assert results_df is not None
308 # Also, filter results that are worse than the default.
309 default_config_results_df = results_df.loc[results_df[config_id_col] == default_config_id]
310 for orderby_col, ascending in orderby_cols.items():
311 default_vals = default_config_results_df[orderby_col].unique()
312 assert len(default_vals) == 1
313 default_val = default_vals[0]
314 assert results_df is not None
315 if ascending:
316 results_df = results_df.loc[(results_df[orderby_col] <= default_val)]
317 else:
318 results_df = results_df.loc[(results_df[orderby_col] >= default_val)]
320 # Now regroup and filter to the top-N configs by their group performance dimensions.
321 assert results_df is not None
322 group_results_df: pandas.DataFrame = results_df.groupby(config_id_col).first()[
323 orderby_cols.keys()
324 ]
325 top_n_config_ids: list[int] = (
326 group_results_df.sort_values(
327 by=list(orderby_cols.keys()), ascending=list(orderby_cols.values())
328 )
329 .head(top_n_configs)
330 .index.tolist()
331 )
333 # Remove the default config if it's included. We'll add it back later.
334 if default_config_id in top_n_config_ids:
335 top_n_config_ids.remove(default_config_id)
336 # Get just the top-n config results.
337 # Sort by the group ids.
338 top_n_config_results_df = results_df.loc[
339 (results_df[config_id_col].isin(top_n_config_ids))
340 ].sort_values([group_id_col, config_id_col, trial_id_col])
341 # Place the default config at the top of the list.
342 top_n_config_ids.insert(0, default_config_id)
343 top_n_config_results_df = pandas.concat(
344 [default_config_results_df, top_n_config_results_df],
345 axis=0,
346 )
347 return (top_n_config_results_df, top_n_config_ids, orderby_cols)
350def plot_optimizer_trends(
351 exp_data: ExperimentData | None = None,
352 *,
353 results_df: pandas.DataFrame | None = None,
354 objectives: dict[str, Literal["min", "max"]] | None = None,
355) -> None:
356 """
357 Plots the optimizer trends for the Experiment.
359 Parameters
360 ----------
361 exp_data : ExperimentData
362 The ExperimentData (e.g., obtained from the storage layer) to plot.
363 results_df : pandas.DataFrame | None
364 Optional results_df to plot.
365 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property.
366 objectives : Optional[dict[str, Literal["min", "max"]]]
367 Optional objectives to plot.
368 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property.
369 """
370 (results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
371 (results_df, groupby_columns, groupby_column) = _add_groupby_desc_column(results_df)
373 for objective_column, ascending in obj_cols.items():
374 incumbent_column = objective_column + ".incumbent"
376 # Determine the mean of each config trial group to match the box plots.
377 group_results_df = (
378 results_df.groupby(groupby_columns)[objective_column]
379 .mean()
380 .reset_index()
381 .sort_values(groupby_columns)
382 )
383 #
384 # Note: technically the optimizer (usually) uses the *first* result for a
385 # given config trial group before moving on to a new config (x-axis), so
386 # plotting the mean may be slightly misleading when trying to understand the
387 # actual path taken by the optimizer in case of high variance samples.
388 # Here's a way to do that, though it can also be misleading if the optimizer
389 # later gets a worse value for that config group as well.
390 #
391 # group_results_df = results_df.sort_values(groupby_columns + ["trial_id"]).groupby(
392 # groupby_columns).head(1)[groupby_columns + [objective_column]].reset_index()
394 # Calculate the incumbent (best seen so far)
395 if ascending:
396 group_results_df[incumbent_column] = group_results_df[objective_column].cummin()
397 else:
398 group_results_df[incumbent_column] = group_results_df[objective_column].cummax()
400 (_fig, axis) = plt.subplots(figsize=(15, 5))
402 # Result of each set of trials for a config
403 sns.boxplot(
404 data=results_df,
405 x=groupby_column,
406 y=objective_column,
407 ax=axis,
408 )
410 # Results of the best so far.
411 axis = sns.lineplot(
412 data=group_results_df,
413 x=groupby_column,
414 y=incumbent_column,
415 alpha=0.7,
416 label="Mean of Incumbent Config Trial Group",
417 ax=axis,
418 )
420 plt.yscale("log")
421 plt.ylabel(objective_column.replace(ExperimentData.RESULT_COLUMN_PREFIX, ""))
423 plt.xlabel("Config Trial Group ID, Config ID")
424 plt.xticks(rotation=90, fontsize=8)
426 plt.title(
427 "Optimizer Trends for Experiment: " + exp_data.experiment_id
428 if exp_data is not None
429 else ""
430 )
431 plt.grid()
432 plt.show()
435def plot_top_n_configs(
436 exp_data: ExperimentData | None = None,
437 *,
438 results_df: pandas.DataFrame | None = None,
439 objectives: dict[str, Literal["min", "max"]] | None = None,
440 with_scatter_plot: bool = False,
441 **kwargs: Any,
442) -> None:
443 # pylint: disable=too-many-locals
444 """
445 Plots the top-N configs along with the default config for the given
446 :py:class:`.ExperimentData`.
448 Intended to be used from a Jupyter notebook.
450 Parameters
451 ----------
452 exp_data: ExperimentData
453 The experiment data to plot.
454 results_df : pandas.DataFrame | None
455 Optional results_df to plot.
456 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property.
457 objectives : Optional[dict[str, Literal["min", "max"]]]
458 Optional objectives to plot.
459 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property.
460 with_scatter_plot : bool
461 Whether to also add scatter plot to the output figure.
462 kwargs : dict
463 Remaining keyword arguments are passed along to the
464 :py:func:`limit_top_n_configs` function.
465 """
466 (results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
467 top_n_config_args = _get_kwarg_defaults(limit_top_n_configs, **kwargs)
468 if "results_df" not in top_n_config_args:
469 top_n_config_args["results_df"] = results_df
470 if "objectives" not in top_n_config_args:
471 top_n_config_args["objectives"] = objectives
472 (top_n_config_results_df, _top_n_config_ids, orderby_cols) = limit_top_n_configs(
473 exp_data=exp_data,
474 **top_n_config_args,
475 )
477 (top_n_config_results_df, _groupby_columns, groupby_column) = _add_groupby_desc_column(
478 top_n_config_results_df,
479 )
480 top_n = len(top_n_config_results_df[groupby_column].unique()) - 1
482 for orderby_col, ascending in orderby_cols.items():
483 opt_tgt = orderby_col.replace(ExperimentData.RESULT_COLUMN_PREFIX, "")
484 (_fig, axis) = plt.subplots()
485 sns.violinplot(
486 data=top_n_config_results_df,
487 x=groupby_column,
488 y=orderby_col,
489 ax=axis,
490 )
491 if with_scatter_plot:
492 sns.scatterplot(
493 data=top_n_config_results_df,
494 x=groupby_column,
495 y=orderby_col,
496 legend=False,
497 ax=axis,
498 )
499 plt.grid()
500 (xticks, xlabels) = plt.xticks()
501 # default should be in the first position based on top_n_configs() return
502 xlabels[0] = "default" # type: ignore[call-overload]
503 plt.xticks(xticks, xlabels) # type: ignore[arg-type]
504 plt.xlabel("Config Trial Group, Config ID")
505 plt.xticks(rotation=90)
506 plt.ylabel(opt_tgt)
507 plt.yscale("log")
508 extra_title = "(lower is better)" if ascending else "(lower is better)"
509 plt.title(f"Top {top_n} configs {opt_tgt} {extra_title}")
510 plt.show()