Coverage for mlos_bench/mlos_bench/storage/util.py: 88%

17 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-21 01:50 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Utility functions for the storage subsystem.""" 

6 

7 

8import pandas 

9 

10from mlos_bench.tunables.tunable import TunableValue, TunableValueTypeTuple 

11from mlos_bench.util import try_parse_val 

12 

13 

14def kv_df_to_dict(dataframe: pandas.DataFrame) -> dict[str, TunableValue | None]: 

15 """ 

16 Utility function to convert certain flat key-value dataframe formats used by the 

17 mlos_bench.storage modules to a dict. 

18 

19 Parameters 

20 ---------- 

21 dataframe : pandas.DataFrame 

22 A dataframe with exactly two columns, 'parameter' (or 'metric') and 'value', where 

23 'parameter' is a string and 'value' is some TunableValue or None. 

24 """ 

25 if dataframe.columns.tolist() == ["metric", "value"]: 

26 dataframe = dataframe.copy() 

27 dataframe.rename(columns={"metric": "parameter"}, inplace=True) 

28 assert dataframe.columns.tolist() == ["parameter", "value"] 

29 data = {} 

30 for _, row in dataframe.astype("O").iterrows(): 

31 if not isinstance(row["value"], TunableValueTypeTuple): 

32 raise TypeError(f"""Invalid column type: {type(row["value"])} value: {row["value"]}""") 

33 assert isinstance(row["parameter"], str) 

34 if row["parameter"] in data: 

35 raise ValueError(f"""Duplicate parameter '{row["parameter"]}' in dataframe""") 

36 data[row["parameter"]] = ( 

37 try_parse_val(row["value"]) if isinstance(row["value"], str) else row["value"] 

38 ) 

39 return data