Coverage for mlos_bench/mlos_bench/storage/util.py: 89%

18 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-07 01:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Utility functions for the storage subsystem.""" 

6 

7from typing import Dict, Optional 

8 

9import pandas 

10 

11from mlos_bench.tunables.tunable import TunableValue, TunableValueTypeTuple 

12from mlos_bench.util import try_parse_val 

13 

14 

15def kv_df_to_dict(dataframe: pandas.DataFrame) -> Dict[str, Optional[TunableValue]]: 

16 """ 

17 Utility function to convert certain flat key-value dataframe formats used by the 

18 mlos_bench.storage modules to a dict. 

19 

20 Parameters 

21 ---------- 

22 dataframe : pandas.DataFrame 

23 A dataframe with exactly two columns, 'parameter' (or 'metric') and 'value', where 

24 'parameter' is a string and 'value' is some TunableValue or None. 

25 """ 

26 if dataframe.columns.tolist() == ["metric", "value"]: 

27 dataframe = dataframe.copy() 

28 dataframe.rename(columns={"metric": "parameter"}, inplace=True) 

29 assert dataframe.columns.tolist() == ["parameter", "value"] 

30 data = {} 

31 for _, row in dataframe.astype("O").iterrows(): 

32 if not isinstance(row["value"], TunableValueTypeTuple): 

33 raise TypeError(f"Invalid column type: {type(row['value'])} value: {row['value']}") 

34 assert isinstance(row["parameter"], str) 

35 if row["parameter"] in data: 

36 raise ValueError(f"Duplicate parameter '{row['parameter']}' in dataframe") 

37 data[row["parameter"]] = ( 

38 try_parse_val(row["value"]) if isinstance(row["value"], str) else row["value"] 

39 ) 

40 return data