Coverage for mlos_viz/mlos_viz/util.py: 78%

23 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-07 01:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Utility functions for manipulating experiment results data.""" 

6from typing import Dict, Literal, Optional, Tuple 

7 

8import pandas 

9 

10from mlos_bench.storage.base_experiment_data import ExperimentData 

11 

12 

13def expand_results_data_args( 

14 exp_data: Optional[ExperimentData] = None, 

15 results_df: Optional[pandas.DataFrame] = None, 

16 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

17) -> Tuple[pandas.DataFrame, Dict[str, bool]]: 

18 """ 

19 Expands some common arguments for working with results data. 

20 

21 Used by mlos_viz as well. 

22 

23 Parameters 

24 ---------- 

25 exp_data : Optional[ExperimentData], optional 

26 ExperimentData to operate on. 

27 results_df : Optional[pandas.DataFrame], optional 

28 Optional results_df argument. 

29 Defaults to exp_data.results_df property. 

30 objectives : Optional[Dict[str, Literal["min", "max"]]], optional 

31 Optional objectives set to operate on. 

32 Defaults to exp_data.objectives property. 

33 

34 Returns 

35 ------- 

36 Tuple[pandas.DataFrame, Dict[str, bool]] 

37 The results dataframe and the objectives columns in the dataframe, plus 

38 whether or not they are in ascending order. 

39 """ 

40 # Prepare the orderby columns. 

41 if results_df is None: 

42 if exp_data is None: 

43 raise ValueError("Must provide either exp_data or both results_df and objectives.") 

44 results_df = exp_data.results_df 

45 

46 if objectives is None: 

47 if exp_data is None: 

48 raise ValueError("Must provide either exp_data or both results_df and objectives.") 

49 objectives = exp_data.objectives 

50 objs_cols: Dict[str, bool] = {} 

51 for opt_tgt, opt_dir in objectives.items(): 

52 if opt_dir not in ["min", "max"]: 

53 raise ValueError(f"Unexpected optimization direction for target {opt_tgt}: {opt_dir}") 

54 ascending = opt_dir == "min" 

55 if ( 

56 opt_tgt.startswith(ExperimentData.RESULT_COLUMN_PREFIX) 

57 and opt_tgt in results_df.columns 

58 ): 

59 objs_cols[opt_tgt] = ascending 

60 elif ExperimentData.RESULT_COLUMN_PREFIX + opt_tgt in results_df.columns: 

61 objs_cols[ExperimentData.RESULT_COLUMN_PREFIX + opt_tgt] = ascending 

62 else: 

63 raise UserWarning(f"{opt_tgt} is not a result column for experiment {exp_data}") 

64 # Note: these copies are important to avoid issues with downstream consumers. 

65 # It is more efficient to copy the dataframe than to go back to the original data source. 

66 # TODO: However, it should be possible to later fixup the downstream consumers 

67 # (which are currently still internal to mlos-viz) to make their own data 

68 # sources if necessary. That will of course need tests. 

69 return (results_df.copy(), objs_cols.copy())