Coverage for mlos_viz/mlos_viz/base.py: 90%
154 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Base functions for visualizing, explain, and gain insights from results."""
7import re
8import warnings
9from importlib.metadata import version
10from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Tuple, Union
12import pandas
13import seaborn as sns
14from matplotlib import pyplot as plt
15from pandas.api.types import is_numeric_dtype
16from pandas.core.groupby.generic import SeriesGroupBy
18from mlos_bench.storage.base_experiment_data import ExperimentData
19from mlos_viz.util import expand_results_data_args
21_SEABORN_VERS = version("seaborn")
24def _get_kwarg_defaults(target: Callable, **kwargs: Any) -> Dict[str, Any]:
25 """
26 Assembles a smaller kwargs dict for the specified target function.
28 Note: this only works with non-positional kwargs (e.g., those after a * arg).
29 """
30 target_kwargs = {}
31 for kword in target.__kwdefaults__: # or {} # intentionally omitted for now
32 if kword in kwargs:
33 target_kwargs[kword] = kwargs[kword]
34 return target_kwargs
37def ignore_plotter_warnings() -> None:
38 """Suppress some annoying warnings from third-party data visualization packages by
39 adding them to the warnings filter.
40 """
41 warnings.filterwarnings("ignore", category=FutureWarning)
42 if _SEABORN_VERS <= "0.13.1":
43 warnings.filterwarnings(
44 "ignore",
45 category=DeprecationWarning,
46 module="seaborn", # but actually comes from pandas
47 message="is_categorical_dtype is deprecated and will be removed in a future version.",
48 )
51def _add_groupby_desc_column(
52 results_df: pandas.DataFrame,
53 groupby_columns: Optional[List[str]] = None,
54) -> Tuple[pandas.DataFrame, List[str], str]:
55 """
56 Adds a group descriptor column to the results_df.
58 Parameters
59 ----------
60 results_df: ExperimentData
61 The experiment data to add the descriptor column to.
62 groupby_columns: Optional[List[str]]
63 """
64 # Compose a new groupby_column for display purposes that is the
65 # concatenation of the min trial_id (the first one) of each config trial
66 # group and the config_id.
67 # Note: It's need to be a string (e.g., categorical) for boxplot and lineplot to
68 # be on the same axis anyways.
69 if groupby_columns is None:
70 groupby_columns = ["tunable_config_trial_group_id", "tunable_config_id"]
71 groupby_column = ",".join(groupby_columns)
72 results_df[groupby_column] = (
73 results_df[groupby_columns].astype(str).apply(lambda x: ",".join(x), axis=1)
74 ) # pylint: disable=unnecessary-lambda
75 groupby_columns.append(groupby_column)
76 return (results_df, groupby_columns, groupby_column)
79def augment_results_df_with_config_trial_group_stats(
80 exp_data: Optional[ExperimentData] = None,
81 *,
82 results_df: Optional[pandas.DataFrame] = None,
83 requested_result_cols: Optional[Iterable[str]] = None,
84) -> pandas.DataFrame:
85 # pylint: disable=too-complex
86 """
87 Add a number of useful statistical measure columns to the results dataframe.
89 In particular, for each numeric result, we add the following columns for each
90 requested result column:
92 - ".p50": the median of each config trial group results
94 - ".p75": the p75 of each config trial group results
96 - ".p90": the p90 of each config trial group results
98 - ".p95": the p95 of each config trial group results
100 - ".p99": the p95 of each config trial group results
102 - ".mean": the mean of each config trial group results
104 - ".stddev": the mean of each config trial group results
106 - ".var": the variance of each config trial group results
108 - ".var_zscore": the zscore of this group (i.e., variance relative to the stddev
109 of all group variances). This can be useful for filtering out outliers (e.g.,
110 configs with high variance relative to others by restricting to abs < 2 to
111 remove those two standard deviations from the mean variance across all config
112 trial groups).
114 Additionally, we add a "tunable_config_trial_group_size" column that indicates
115 the number of trials using a particular config.
117 Parameters
118 ----------
119 exp_data : ExperimentData
120 The ExperimentData (e.g., obtained from the storage layer) to plot.
121 results_df : Optional[pandas.DataFrame]
122 The results dataframe to augment, by default None to use the results_df property.
123 requested_result_cols : Optional[Iterable[str]]
124 Which results columns to augment, by default None to use all results columns
125 that look numeric.
127 Returns
128 -------
129 pandas.DataFrame
130 The augmented results dataframe.
131 """
132 if results_df is None:
133 if exp_data is None:
134 raise ValueError("Either exp_data or results_df must be provided.")
135 results_df = exp_data.results_df
136 results_groups = results_df.groupby("tunable_config_id")
137 if len(results_groups) <= 1:
138 raise ValueError(f"Not enough data: {len(results_groups)}")
140 if requested_result_cols is None:
141 result_cols = set(
142 col
143 for col in results_df.columns
144 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX)
145 )
146 else:
147 result_cols = set(
148 col
149 for col in requested_result_cols
150 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and col in results_df.columns
151 )
152 result_cols.update(
153 set(
154 ExperimentData.RESULT_COLUMN_PREFIX + col
155 for col in requested_result_cols
156 if ExperimentData.RESULT_COLUMN_PREFIX in results_df.columns
157 )
158 )
160 def compute_zscore_for_group_agg(
161 results_groups_perf: "SeriesGroupBy",
162 stats_df: pandas.DataFrame,
163 result_col: str,
164 agg: Union[Literal["mean"], Literal["var"], Literal["std"]],
165 ) -> None:
166 results_groups_perf_aggs = results_groups_perf.agg(agg) # TODO: avoid recalculating?
167 # Compute the zscore of the chosen aggregate performance of each group into
168 # each row in the dataframe.
169 stats_df[result_col + f".{agg}_mean"] = results_groups_perf_aggs.mean()
170 stats_df[result_col + f".{agg}_stddev"] = results_groups_perf_aggs.std()
171 stats_df[result_col + f".{agg}_zscore"] = (
172 stats_df[result_col + f".{agg}"] - stats_df[result_col + f".{agg}_mean"]
173 ) / stats_df[result_col + f".{agg}_stddev"]
174 stats_df.drop(
175 columns=[result_col + ".var_" + agg for agg in ("mean", "stddev")], inplace=True
176 )
178 augmented_results_df = results_df
179 augmented_results_df["tunable_config_trial_group_size"] = results_groups["trial_id"].transform(
180 "count"
181 )
182 for result_col in result_cols:
183 if not result_col.startswith(ExperimentData.RESULT_COLUMN_PREFIX):
184 continue
185 if re.search(r"(start|end).*time", result_col, flags=re.IGNORECASE):
186 # Ignore computing variance on things like that look like timestamps.
187 continue
188 if not is_numeric_dtype(results_df[result_col]):
189 continue
190 if results_df[result_col].unique().size == 1:
191 continue
192 results_groups_perf = results_groups[result_col]
193 stats_df = pandas.DataFrame()
194 stats_df[result_col + ".mean"] = results_groups_perf.transform("mean", numeric_only=True)
195 stats_df[result_col + ".var"] = results_groups_perf.transform("var")
196 stats_df[result_col + ".stddev"] = stats_df[result_col + ".var"].apply(lambda x: x**0.5)
198 compute_zscore_for_group_agg(results_groups_perf, stats_df, result_col, "var")
199 quantiles = [0.50, 0.75, 0.90, 0.95, 0.99]
200 for quantile in quantiles: # TODO: can we do this in one pass?
201 quantile_col = f"{result_col}.p{int(quantile * 100)}"
202 stats_df[quantile_col] = results_groups_perf.transform("quantile", quantile)
203 augmented_results_df = pandas.concat([augmented_results_df, stats_df], axis=1)
204 return augmented_results_df
207def limit_top_n_configs(
208 exp_data: Optional[ExperimentData] = None,
209 *,
210 results_df: Optional[pandas.DataFrame] = None,
211 objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
212 top_n_configs: int = 10,
213 method: Literal["mean", "p50", "p75", "p90", "p95", "p99"] = "mean",
214) -> Tuple[pandas.DataFrame, List[int], Dict[str, bool]]:
215 # pylint: disable=too-many-locals
216 """
217 Utility function to process the results and determine the best performing configs
218 including potential repeats to help assess variability.
220 Parameters
221 ----------
222 exp_data : Optional[ExperimentData]
223 The ExperimentData (e.g., obtained from the storage layer) to operate on.
224 results_df : Optional[pandas.DataFrame]
225 The results dataframe to augment, by default None to use the results_df property.
226 objectives : Iterable[str], optional
227 Which result column(s) to use for sorting the configs, and in which
228 direction ("min" or "max").
229 By default None to automatically select the experiment objectives.
230 top_n_configs : int, optional
231 How many configs to return, including the default, by default 20.
232 method: Literal["mean", "median", "p50", "p75", "p90", "p95", "p99"] = "mean",
233 Which statistical method to use when sorting the config groups before
234 determining the cutoff, by default "mean".
236 Returns
237 -------
238 (top_n_config_results_df, top_n_config_ids, orderby_cols) :
239 Tuple[pandas.DataFrame, List[int], Dict[str, bool]]
240 The filtered results dataframe, the config ids, and the columns used to
241 order the configs.
242 """
243 # Do some input checking first.
244 if method not in ["mean", "median", "p50", "p75", "p90", "p95", "p99"]:
245 raise ValueError(f"Invalid method: {method}")
247 # Prepare the orderby columns.
248 (results_df, objs_cols) = expand_results_data_args(
249 exp_data,
250 results_df=results_df,
251 objectives=objectives,
252 )
253 assert isinstance(results_df, pandas.DataFrame)
255 # Augment the results dataframe with some useful stats.
256 results_df = augment_results_df_with_config_trial_group_stats(
257 exp_data=exp_data,
258 results_df=results_df,
259 requested_result_cols=objs_cols.keys(),
260 )
261 # Note: mypy seems to lose its mind for some reason and keeps forgetting that
262 # results_df is not None and is in fact a DataFrame, so we periodically assert
263 # it in this func for now.
264 assert results_df is not None
265 orderby_cols: Dict[str, bool] = {
266 obj_col + f".{method}": ascending for (obj_col, ascending) in objs_cols.items()
267 }
269 config_id_col = "tunable_config_id"
270 group_id_col = "tunable_config_trial_group_id" # first trial_id per config group
271 trial_id_col = "trial_id"
273 default_config_id = (
274 results_df[trial_id_col].min() if exp_data is None else exp_data.default_tunable_config_id
275 )
276 assert default_config_id is not None, "Failed to determine default config id."
278 # Filter out configs whose variance is too large.
279 # But also make sure the default configs is still in the resulting dataframe
280 # (for comparison purposes).
281 for obj_col in objs_cols:
282 assert results_df is not None
283 if method == "mean":
284 singletons_mask = results_df["tunable_config_trial_group_size"] == 1
285 else:
286 singletons_mask = results_df["tunable_config_trial_group_size"] > 1
287 results_df = results_df.loc[
288 (
289 (results_df[f"{obj_col}.var_zscore"].abs() < 2)
290 | (singletons_mask)
291 | (results_df[config_id_col] == default_config_id)
292 )
293 ]
294 assert results_df is not None
296 # Also, filter results that are worse than the default.
297 default_config_results_df = results_df.loc[results_df[config_id_col] == default_config_id]
298 for orderby_col, ascending in orderby_cols.items():
299 default_vals = default_config_results_df[orderby_col].unique()
300 assert len(default_vals) == 1
301 default_val = default_vals[0]
302 assert results_df is not None
303 if ascending:
304 results_df = results_df.loc[(results_df[orderby_col] <= default_val)]
305 else:
306 results_df = results_df.loc[(results_df[orderby_col] >= default_val)]
308 # Now regroup and filter to the top-N configs by their group performance dimensions.
309 assert results_df is not None
310 group_results_df: pandas.DataFrame = results_df.groupby(config_id_col).first()[
311 orderby_cols.keys()
312 ]
313 top_n_config_ids: List[int] = (
314 group_results_df.sort_values(
315 by=list(orderby_cols.keys()), ascending=list(orderby_cols.values())
316 )
317 .head(top_n_configs)
318 .index.tolist()
319 )
321 # Remove the default config if it's included. We'll add it back later.
322 if default_config_id in top_n_config_ids:
323 top_n_config_ids.remove(default_config_id)
324 # Get just the top-n config results.
325 # Sort by the group ids.
326 top_n_config_results_df = results_df.loc[
327 (results_df[config_id_col].isin(top_n_config_ids))
328 ].sort_values([group_id_col, config_id_col, trial_id_col])
329 # Place the default config at the top of the list.
330 top_n_config_ids.insert(0, default_config_id)
331 top_n_config_results_df = pandas.concat(
332 [default_config_results_df, top_n_config_results_df],
333 axis=0,
334 )
335 return (top_n_config_results_df, top_n_config_ids, orderby_cols)
338def plot_optimizer_trends(
339 exp_data: Optional[ExperimentData] = None,
340 *,
341 results_df: Optional[pandas.DataFrame] = None,
342 objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
343) -> None:
344 """
345 Plots the optimizer trends for the Experiment.
347 Parameters
348 ----------
349 exp_data : ExperimentData
350 The ExperimentData (e.g., obtained from the storage layer) to plot.
351 results_df : Optional["pandas.DataFrame"]
352 Optional results_df to plot.
353 If not provided, defaults to exp_data.results_df property.
354 objectives : Optional[Dict[str, Literal["min", "max"]]]
355 Optional objectives to plot.
356 If not provided, defaults to exp_data.objectives property.
357 """
358 (results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
359 (results_df, groupby_columns, groupby_column) = _add_groupby_desc_column(results_df)
361 for objective_column, ascending in obj_cols.items():
362 incumbent_column = objective_column + ".incumbent"
364 # Determine the mean of each config trial group to match the box plots.
365 group_results_df = (
366 results_df.groupby(groupby_columns)[objective_column]
367 .mean()
368 .reset_index()
369 .sort_values(groupby_columns)
370 )
371 #
372 # Note: technically the optimizer (usually) uses the *first* result for a
373 # given config trial group before moving on to a new config (x-axis), so
374 # plotting the mean may be slightly misleading when trying to understand the
375 # actual path taken by the optimizer in case of high variance samples.
376 # Here's a way to do that, though it can also be misleading if the optimizer
377 # later gets a worse value for that config group as well.
378 #
379 # group_results_df = results_df.sort_values(groupby_columns + ["trial_id"]).groupby(
380 # groupby_columns).head(1)[groupby_columns + [objective_column]].reset_index()
382 # Calculate the incumbent (best seen so far)
383 if ascending:
384 group_results_df[incumbent_column] = group_results_df[objective_column].cummin()
385 else:
386 group_results_df[incumbent_column] = group_results_df[objective_column].cummax()
388 (_fig, axis) = plt.subplots(figsize=(15, 5))
390 # Result of each set of trials for a config
391 sns.boxplot(
392 data=results_df,
393 x=groupby_column,
394 y=objective_column,
395 ax=axis,
396 )
398 # Results of the best so far.
399 axis = sns.lineplot(
400 data=group_results_df,
401 x=groupby_column,
402 y=incumbent_column,
403 alpha=0.7,
404 label="Mean of Incumbent Config Trial Group",
405 ax=axis,
406 )
408 plt.yscale("log")
409 plt.ylabel(objective_column.replace(ExperimentData.RESULT_COLUMN_PREFIX, ""))
411 plt.xlabel("Config Trial Group ID, Config ID")
412 plt.xticks(rotation=90, fontsize=8)
414 plt.title(
415 "Optimizer Trends for Experiment: " + exp_data.experiment_id
416 if exp_data is not None
417 else ""
418 )
419 plt.grid()
420 plt.show() # type: ignore[no-untyped-call]
423def plot_top_n_configs(
424 exp_data: Optional[ExperimentData] = None,
425 *,
426 results_df: Optional[pandas.DataFrame] = None,
427 objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
428 with_scatter_plot: bool = False,
429 **kwargs: Any,
430) -> None:
431 # pylint: disable=too-many-locals
432 """
433 Plots the top-N configs along with the default config for the given ExperimentData.
435 Intended to be used from a Jupyter notebook.
437 Parameters
438 ----------
439 exp_data: ExperimentData
440 The experiment data to plot.
441 results_df : Optional["pandas.DataFrame"]
442 Optional results_df to plot.
443 If not provided, defaults to exp_data.results_df property.
444 objectives : Optional[Dict[str, Literal["min", "max"]]]
445 Optional objectives to plot.
446 If not provided, defaults to exp_data.objectives property.
447 with_scatter_plot : bool
448 Whether to also add scatter plot to the output figure.
449 kwargs : dict
450 Remaining keyword arguments are passed along to the limit_top_n_configs function.
451 """
452 (results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
453 top_n_config_args = _get_kwarg_defaults(limit_top_n_configs, **kwargs)
454 if "results_df" not in top_n_config_args:
455 top_n_config_args["results_df"] = results_df
456 if "objectives" not in top_n_config_args:
457 top_n_config_args["objectives"] = objectives
458 (top_n_config_results_df, _top_n_config_ids, orderby_cols) = limit_top_n_configs(
459 exp_data=exp_data,
460 **top_n_config_args,
461 )
463 (top_n_config_results_df, _groupby_columns, groupby_column) = _add_groupby_desc_column(
464 top_n_config_results_df,
465 )
466 top_n = len(top_n_config_results_df[groupby_column].unique()) - 1
468 for orderby_col, ascending in orderby_cols.items():
469 opt_tgt = orderby_col.replace(ExperimentData.RESULT_COLUMN_PREFIX, "")
470 (_fig, axis) = plt.subplots()
471 sns.violinplot(
472 data=top_n_config_results_df,
473 x=groupby_column,
474 y=orderby_col,
475 ax=axis,
476 )
477 if with_scatter_plot:
478 sns.scatterplot(
479 data=top_n_config_results_df,
480 x=groupby_column,
481 y=orderby_col,
482 legend=None,
483 ax=axis,
484 )
485 plt.grid()
486 (xticks, xlabels) = plt.xticks()
487 # default should be in the first position based on top_n_configs() return
488 xlabels[0] = "default" # type: ignore[call-overload]
489 plt.xticks(xticks, xlabels) # type: ignore[arg-type]
490 plt.xlabel("Config Trial Group, Config ID")
491 plt.xticks(rotation=90)
492 plt.ylabel(opt_tgt)
493 plt.yscale("log")
494 extra_title = "(lower is better)" if ascending else "(lower is better)"
495 plt.title(f"Top {top_n} configs {opt_tgt} {extra_title}")
496 plt.show() # type: ignore[no-untyped-call]