Coverage for mlos_viz/mlos_viz/base.py: 90%

154 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-14 01:58 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Base functions for visualizing, explain, and gain insights from results.""" 

6 

7import re 

8import warnings 

9from importlib.metadata import version 

10from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Tuple, Union 

11 

12import pandas 

13import seaborn as sns 

14from matplotlib import pyplot as plt 

15from pandas.api.types import is_numeric_dtype 

16from pandas.core.groupby.generic import SeriesGroupBy 

17 

18from mlos_bench.storage.base_experiment_data import ExperimentData 

19from mlos_viz.util import expand_results_data_args 

20 

21_SEABORN_VERS = version("seaborn") 

22 

23 

24def _get_kwarg_defaults(target: Callable, **kwargs: Any) -> Dict[str, Any]: 

25 """ 

26 Assembles a smaller kwargs dict for the specified target function. 

27 

28 Note: this only works with non-positional kwargs (e.g., those after a * arg). 

29 """ 

30 target_kwargs = {} 

31 for kword in target.__kwdefaults__: # or {} # intentionally omitted for now 

32 if kword in kwargs: 

33 target_kwargs[kword] = kwargs[kword] 

34 return target_kwargs 

35 

36 

37def ignore_plotter_warnings() -> None: 

38 """Suppress some annoying warnings from third-party data visualization packages by 

39 adding them to the warnings filter. 

40 """ 

41 warnings.filterwarnings("ignore", category=FutureWarning) 

42 if _SEABORN_VERS <= "0.13.1": 

43 warnings.filterwarnings( 

44 "ignore", 

45 category=DeprecationWarning, 

46 module="seaborn", # but actually comes from pandas 

47 message="is_categorical_dtype is deprecated and will be removed in a future version.", 

48 ) 

49 

50 

51def _add_groupby_desc_column( 

52 results_df: pandas.DataFrame, 

53 groupby_columns: Optional[List[str]] = None, 

54) -> Tuple[pandas.DataFrame, List[str], str]: 

55 """ 

56 Adds a group descriptor column to the results_df. 

57 

58 Parameters 

59 ---------- 

60 results_df: ExperimentData 

61 The experiment data to add the descriptor column to. 

62 groupby_columns: Optional[List[str]] 

63 """ 

64 # Compose a new groupby_column for display purposes that is the 

65 # concatenation of the min trial_id (the first one) of each config trial 

66 # group and the config_id. 

67 # Note: It's need to be a string (e.g., categorical) for boxplot and lineplot to 

68 # be on the same axis anyways. 

69 if groupby_columns is None: 

70 groupby_columns = ["tunable_config_trial_group_id", "tunable_config_id"] 

71 groupby_column = ",".join(groupby_columns) 

72 results_df[groupby_column] = ( 

73 results_df[groupby_columns].astype(str).apply(",".join, axis=1) 

74 ) # pylint: disable=unnecessary-lambda 

75 groupby_columns.append(groupby_column) 

76 return (results_df, groupby_columns, groupby_column) 

77 

78 

79def augment_results_df_with_config_trial_group_stats( 

80 exp_data: Optional[ExperimentData] = None, 

81 *, 

82 results_df: Optional[pandas.DataFrame] = None, 

83 requested_result_cols: Optional[Iterable[str]] = None, 

84) -> pandas.DataFrame: 

85 # pylint: disable=too-complex 

86 """ 

87 Add a number of useful statistical measure columns to the results dataframe. 

88 

89 In particular, for each numeric result, we add the following columns for each 

90 requested result column: 

91 

92 - ".p50": the median of each config trial group results 

93 

94 - ".p75": the p75 of each config trial group results 

95 

96 - ".p90": the p90 of each config trial group results 

97 

98 - ".p95": the p95 of each config trial group results 

99 

100 - ".p99": the p95 of each config trial group results 

101 

102 - ".mean": the mean of each config trial group results 

103 

104 - ".stddev": the mean of each config trial group results 

105 

106 - ".var": the variance of each config trial group results 

107 

108 - ".var_zscore": the zscore of this group (i.e., variance relative to the stddev 

109 of all group variances). This can be useful for filtering out outliers (e.g., 

110 configs with high variance relative to others by restricting to abs < 2 to 

111 remove those two standard deviations from the mean variance across all config 

112 trial groups). 

113 

114 Additionally, we add a "tunable_config_trial_group_size" column that indicates 

115 the number of trials using a particular config. 

116 

117 Parameters 

118 ---------- 

119 exp_data : ExperimentData 

120 The ExperimentData (e.g., obtained from the storage layer) to plot. 

121 results_df : Optional[pandas.DataFrame] 

122 The results dataframe to augment, by default None to use the results_df property. 

123 requested_result_cols : Optional[Iterable[str]] 

124 Which results columns to augment, by default None to use all results columns 

125 that look numeric. 

126 

127 Returns 

128 ------- 

129 pandas.DataFrame 

130 The augmented results dataframe. 

131 """ 

132 if results_df is None: 

133 if exp_data is None: 

134 raise ValueError("Either exp_data or results_df must be provided.") 

135 results_df = exp_data.results_df 

136 results_groups = results_df.groupby("tunable_config_id") 

137 if len(results_groups) <= 1: 

138 raise ValueError(f"Not enough data: {len(results_groups)}") 

139 

140 if requested_result_cols is None: 

141 result_cols = set( 

142 col 

143 for col in results_df.columns 

144 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) 

145 ) 

146 else: 

147 result_cols = set( 

148 col 

149 for col in requested_result_cols 

150 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and col in results_df.columns 

151 ) 

152 result_cols.update( 

153 set( 

154 ExperimentData.RESULT_COLUMN_PREFIX + col 

155 for col in requested_result_cols 

156 if ExperimentData.RESULT_COLUMN_PREFIX in results_df.columns 

157 ) 

158 ) 

159 

160 def compute_zscore_for_group_agg( 

161 results_groups_perf: "SeriesGroupBy", 

162 stats_df: pandas.DataFrame, 

163 result_col: str, 

164 agg: Union[Literal["mean"], Literal["var"], Literal["std"]], 

165 ) -> None: 

166 results_groups_perf_aggs = results_groups_perf.agg(agg) # TODO: avoid recalculating? 

167 # Compute the zscore of the chosen aggregate performance of each group into 

168 # each row in the dataframe. 

169 stats_df[result_col + f".{agg}_mean"] = results_groups_perf_aggs.mean() 

170 stats_df[result_col + f".{agg}_stddev"] = results_groups_perf_aggs.std() 

171 stats_df[result_col + f".{agg}_zscore"] = ( 

172 stats_df[result_col + f".{agg}"] - stats_df[result_col + f".{agg}_mean"] 

173 ) / stats_df[result_col + f".{agg}_stddev"] 

174 stats_df.drop( 

175 columns=[result_col + ".var_" + agg for agg in ("mean", "stddev")], inplace=True 

176 ) 

177 

178 augmented_results_df = results_df 

179 augmented_results_df["tunable_config_trial_group_size"] = results_groups["trial_id"].transform( 

180 "count" 

181 ) 

182 for result_col in result_cols: 

183 if not result_col.startswith(ExperimentData.RESULT_COLUMN_PREFIX): 

184 continue 

185 if re.search(r"(start|end).*time", result_col, flags=re.IGNORECASE): 

186 # Ignore computing variance on things like that look like timestamps. 

187 continue 

188 if not is_numeric_dtype(results_df[result_col]): 

189 continue 

190 if results_df[result_col].unique().size == 1: 

191 continue 

192 results_groups_perf = results_groups[result_col] 

193 stats_df = pandas.DataFrame() 

194 stats_df[result_col + ".mean"] = results_groups_perf.transform("mean", numeric_only=True) 

195 stats_df[result_col + ".var"] = results_groups_perf.transform("var") 

196 stats_df[result_col + ".stddev"] = stats_df[result_col + ".var"].apply(lambda x: x**0.5) 

197 

198 compute_zscore_for_group_agg(results_groups_perf, stats_df, result_col, "var") 

199 quantiles = [0.50, 0.75, 0.90, 0.95, 0.99] 

200 for quantile in quantiles: # TODO: can we do this in one pass? 

201 quantile_col = f"{result_col}.p{int(quantile * 100)}" 

202 stats_df[quantile_col] = results_groups_perf.transform("quantile", quantile) 

203 augmented_results_df = pandas.concat([augmented_results_df, stats_df], axis=1) 

204 return augmented_results_df 

205 

206 

207def limit_top_n_configs( 

208 exp_data: Optional[ExperimentData] = None, 

209 *, 

210 results_df: Optional[pandas.DataFrame] = None, 

211 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

212 top_n_configs: int = 10, 

213 method: Literal["mean", "p50", "p75", "p90", "p95", "p99"] = "mean", 

214) -> Tuple[pandas.DataFrame, List[int], Dict[str, bool]]: 

215 # pylint: disable=too-many-locals 

216 """ 

217 Utility function to process the results and determine the best performing configs 

218 including potential repeats to help assess variability. 

219 

220 Parameters 

221 ---------- 

222 exp_data : Optional[ExperimentData] 

223 The ExperimentData (e.g., obtained from the storage layer) to operate on. 

224 results_df : Optional[pandas.DataFrame] 

225 The results dataframe to augment, by default None to use 

226 :py:attr:`.ExperimentData.results_df` property. 

227 objectives : Iterable[str] 

228 Which result column(s) to use for sorting the configs, and in which 

229 direction ("min" or "max"). 

230 By default None to automatically select the :py:attr:`.ExperimentData.objectives`. 

231 top_n_configs : int 

232 How many configs to return, including the default, by default 10. 

233 method: Literal["mean", "median", "p50", "p75", "p90", "p95", "p99"] = "mean", 

234 Which statistical method to use when sorting the config groups before 

235 determining the cutoff, by default "mean". 

236 

237 Returns 

238 ------- 

239 (top_n_config_results_df, top_n_config_ids, orderby_cols) : 

240 Tuple[pandas.DataFrame, List[int], Dict[str, bool]] 

241 The filtered results dataframe, the config ids, and the columns used to 

242 order the configs. 

243 """ 

244 # Do some input checking first. 

245 if method not in ["mean", "median", "p50", "p75", "p90", "p95", "p99"]: 

246 raise ValueError(f"Invalid method: {method}") 

247 

248 # Prepare the orderby columns. 

249 (results_df, objs_cols) = expand_results_data_args( 

250 exp_data, 

251 results_df=results_df, 

252 objectives=objectives, 

253 ) 

254 assert isinstance(results_df, pandas.DataFrame) 

255 

256 # Augment the results dataframe with some useful stats. 

257 results_df = augment_results_df_with_config_trial_group_stats( 

258 exp_data=exp_data, 

259 results_df=results_df, 

260 requested_result_cols=objs_cols.keys(), 

261 ) 

262 # Note: mypy seems to lose its mind for some reason and keeps forgetting that 

263 # results_df is not None and is in fact a DataFrame, so we periodically assert 

264 # it in this func for now. 

265 assert results_df is not None 

266 orderby_cols: Dict[str, bool] = { 

267 obj_col + f".{method}": ascending for (obj_col, ascending) in objs_cols.items() 

268 } 

269 

270 config_id_col = "tunable_config_id" 

271 group_id_col = "tunable_config_trial_group_id" # first trial_id per config group 

272 trial_id_col = "trial_id" 

273 

274 default_config_id = ( 

275 results_df[trial_id_col].min() if exp_data is None else exp_data.default_tunable_config_id 

276 ) 

277 assert default_config_id is not None, "Failed to determine default config id." 

278 

279 # Filter out configs whose variance is too large. 

280 # But also make sure the default configs is still in the resulting dataframe 

281 # (for comparison purposes). 

282 for obj_col in objs_cols: 

283 assert results_df is not None 

284 if method == "mean": 

285 singletons_mask = results_df["tunable_config_trial_group_size"] == 1 

286 else: 

287 singletons_mask = results_df["tunable_config_trial_group_size"] > 1 

288 results_df = results_df.loc[ 

289 ( 

290 (results_df[f"{obj_col}.var_zscore"].abs() < 2) 

291 | (singletons_mask) 

292 | (results_df[config_id_col] == default_config_id) 

293 ) 

294 ] 

295 assert results_df is not None 

296 

297 # Also, filter results that are worse than the default. 

298 default_config_results_df = results_df.loc[results_df[config_id_col] == default_config_id] 

299 for orderby_col, ascending in orderby_cols.items(): 

300 default_vals = default_config_results_df[orderby_col].unique() 

301 assert len(default_vals) == 1 

302 default_val = default_vals[0] 

303 assert results_df is not None 

304 if ascending: 

305 results_df = results_df.loc[(results_df[orderby_col] <= default_val)] 

306 else: 

307 results_df = results_df.loc[(results_df[orderby_col] >= default_val)] 

308 

309 # Now regroup and filter to the top-N configs by their group performance dimensions. 

310 assert results_df is not None 

311 group_results_df: pandas.DataFrame = results_df.groupby(config_id_col).first()[ 

312 orderby_cols.keys() 

313 ] 

314 top_n_config_ids: List[int] = ( 

315 group_results_df.sort_values( 

316 by=list(orderby_cols.keys()), ascending=list(orderby_cols.values()) 

317 ) 

318 .head(top_n_configs) 

319 .index.tolist() 

320 ) 

321 

322 # Remove the default config if it's included. We'll add it back later. 

323 if default_config_id in top_n_config_ids: 

324 top_n_config_ids.remove(default_config_id) 

325 # Get just the top-n config results. 

326 # Sort by the group ids. 

327 top_n_config_results_df = results_df.loc[ 

328 (results_df[config_id_col].isin(top_n_config_ids)) 

329 ].sort_values([group_id_col, config_id_col, trial_id_col]) 

330 # Place the default config at the top of the list. 

331 top_n_config_ids.insert(0, default_config_id) 

332 top_n_config_results_df = pandas.concat( 

333 [default_config_results_df, top_n_config_results_df], 

334 axis=0, 

335 ) 

336 return (top_n_config_results_df, top_n_config_ids, orderby_cols) 

337 

338 

339def plot_optimizer_trends( 

340 exp_data: Optional[ExperimentData] = None, 

341 *, 

342 results_df: Optional[pandas.DataFrame] = None, 

343 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

344) -> None: 

345 """ 

346 Plots the optimizer trends for the Experiment. 

347 

348 Parameters 

349 ---------- 

350 exp_data : ExperimentData 

351 The ExperimentData (e.g., obtained from the storage layer) to plot. 

352 results_df : Optional[pandas.DataFrame] 

353 Optional results_df to plot. 

354 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property. 

355 objectives : Optional[Dict[str, Literal["min", "max"]]] 

356 Optional objectives to plot. 

357 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property. 

358 """ 

359 (results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives) 

360 (results_df, groupby_columns, groupby_column) = _add_groupby_desc_column(results_df) 

361 

362 for objective_column, ascending in obj_cols.items(): 

363 incumbent_column = objective_column + ".incumbent" 

364 

365 # Determine the mean of each config trial group to match the box plots. 

366 group_results_df = ( 

367 results_df.groupby(groupby_columns)[objective_column] 

368 .mean() 

369 .reset_index() 

370 .sort_values(groupby_columns) 

371 ) 

372 # 

373 # Note: technically the optimizer (usually) uses the *first* result for a 

374 # given config trial group before moving on to a new config (x-axis), so 

375 # plotting the mean may be slightly misleading when trying to understand the 

376 # actual path taken by the optimizer in case of high variance samples. 

377 # Here's a way to do that, though it can also be misleading if the optimizer 

378 # later gets a worse value for that config group as well. 

379 # 

380 # group_results_df = results_df.sort_values(groupby_columns + ["trial_id"]).groupby( 

381 # groupby_columns).head(1)[groupby_columns + [objective_column]].reset_index() 

382 

383 # Calculate the incumbent (best seen so far) 

384 if ascending: 

385 group_results_df[incumbent_column] = group_results_df[objective_column].cummin() 

386 else: 

387 group_results_df[incumbent_column] = group_results_df[objective_column].cummax() 

388 

389 (_fig, axis) = plt.subplots(figsize=(15, 5)) 

390 

391 # Result of each set of trials for a config 

392 sns.boxplot( 

393 data=results_df, 

394 x=groupby_column, 

395 y=objective_column, 

396 ax=axis, 

397 ) 

398 

399 # Results of the best so far. 

400 axis = sns.lineplot( 

401 data=group_results_df, 

402 x=groupby_column, 

403 y=incumbent_column, 

404 alpha=0.7, 

405 label="Mean of Incumbent Config Trial Group", 

406 ax=axis, 

407 ) 

408 

409 plt.yscale("log") 

410 plt.ylabel(objective_column.replace(ExperimentData.RESULT_COLUMN_PREFIX, "")) 

411 

412 plt.xlabel("Config Trial Group ID, Config ID") 

413 plt.xticks(rotation=90, fontsize=8) 

414 

415 plt.title( 

416 "Optimizer Trends for Experiment: " + exp_data.experiment_id 

417 if exp_data is not None 

418 else "" 

419 ) 

420 plt.grid() 

421 plt.show() 

422 

423 

424def plot_top_n_configs( 

425 exp_data: Optional[ExperimentData] = None, 

426 *, 

427 results_df: Optional[pandas.DataFrame] = None, 

428 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

429 with_scatter_plot: bool = False, 

430 **kwargs: Any, 

431) -> None: 

432 # pylint: disable=too-many-locals 

433 """ 

434 Plots the top-N configs along with the default config for the given 

435 :py:class:`.ExperimentData`. 

436 

437 Intended to be used from a Jupyter notebook. 

438 

439 Parameters 

440 ---------- 

441 exp_data: ExperimentData 

442 The experiment data to plot. 

443 results_df : Optional[pandas.DataFrame] 

444 Optional results_df to plot. 

445 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property. 

446 objectives : Optional[Dict[str, Literal["min", "max"]]] 

447 Optional objectives to plot. 

448 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property. 

449 with_scatter_plot : bool 

450 Whether to also add scatter plot to the output figure. 

451 kwargs : dict 

452 Remaining keyword arguments are passed along to the 

453 :py:func:`limit_top_n_configs` function. 

454 """ 

455 (results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives) 

456 top_n_config_args = _get_kwarg_defaults(limit_top_n_configs, **kwargs) 

457 if "results_df" not in top_n_config_args: 

458 top_n_config_args["results_df"] = results_df 

459 if "objectives" not in top_n_config_args: 

460 top_n_config_args["objectives"] = objectives 

461 (top_n_config_results_df, _top_n_config_ids, orderby_cols) = limit_top_n_configs( 

462 exp_data=exp_data, 

463 **top_n_config_args, 

464 ) 

465 

466 (top_n_config_results_df, _groupby_columns, groupby_column) = _add_groupby_desc_column( 

467 top_n_config_results_df, 

468 ) 

469 top_n = len(top_n_config_results_df[groupby_column].unique()) - 1 

470 

471 for orderby_col, ascending in orderby_cols.items(): 

472 opt_tgt = orderby_col.replace(ExperimentData.RESULT_COLUMN_PREFIX, "") 

473 (_fig, axis) = plt.subplots() 

474 sns.violinplot( 

475 data=top_n_config_results_df, 

476 x=groupby_column, 

477 y=orderby_col, 

478 ax=axis, 

479 ) 

480 if with_scatter_plot: 

481 sns.scatterplot( 

482 data=top_n_config_results_df, 

483 x=groupby_column, 

484 y=orderby_col, 

485 legend=None, 

486 ax=axis, 

487 ) 

488 plt.grid() 

489 (xticks, xlabels) = plt.xticks() 

490 # default should be in the first position based on top_n_configs() return 

491 xlabels[0] = "default" # type: ignore[call-overload] 

492 plt.xticks(xticks, xlabels) # type: ignore[arg-type] 

493 plt.xlabel("Config Trial Group, Config ID") 

494 plt.xticks(rotation=90) 

495 plt.ylabel(opt_tgt) 

496 plt.yscale("log") 

497 extra_title = "(lower is better)" if ascending else "(lower is better)" 

498 plt.title(f"Top {top_n} configs {opt_tgt} {extra_title}") 

499 plt.show()