Coverage for mlos_core/mlos_core/tests/optimizers/one_hot_test.py: 100%

47 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-07 01:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Tests for one-hot encoding for certain optimizers.""" 

6 

7import ConfigSpace as CS 

8import numpy as np 

9import numpy.typing as npt 

10import pandas as pd 

11import pytest 

12 

13from mlos_core.optimizers import BaseOptimizer, SmacOptimizer 

14 

15# pylint: disable=protected-access,redefined-outer-name 

16 

17 

18@pytest.fixture 

19def data_frame() -> pd.DataFrame: 

20 """ 

21 Toy data frame corresponding to the `configuration_space` hyperparameters. 

22 

23 The columns are deliberately *not* in alphabetic order. 

24 """ 

25 return pd.DataFrame( 

26 { 

27 "y": ["a", "b", "c"], 

28 "x": [0.1, 0.2, 0.3], 

29 "z": [1, 5, 8], 

30 } 

31 ) 

32 

33 

34@pytest.fixture 

35def one_hot_data_frame() -> npt.NDArray: 

36 """ 

37 One-hot encoding of the `data_frame` above. 

38 

39 The columns follow the order of the hyperparameters in `configuration_space`. 

40 """ 

41 return np.array( 

42 [ 

43 [0.1, 1.0, 0.0, 0.0, 1.0], 

44 [0.2, 0.0, 1.0, 0.0, 5.0], 

45 [0.3, 0.0, 0.0, 1.0, 8.0], 

46 ] 

47 ) 

48 

49 

50@pytest.fixture 

51def series() -> pd.Series: 

52 """ 

53 Toy series corresponding to the `configuration_space` hyperparameters. 

54 

55 The columns are deliberately *not* in alphabetic order. 

56 """ 

57 return pd.Series( 

58 { 

59 "y": "b", 

60 "x": 0.4, 

61 "z": 3, 

62 } 

63 ) 

64 

65 

66@pytest.fixture 

67def one_hot_series() -> npt.NDArray: 

68 """ 

69 One-hot encoding of the `series` above. 

70 

71 The columns follow the order of the hyperparameters in `configuration_space`. 

72 """ 

73 return np.array( 

74 [ 

75 [0.4, 0.0, 1.0, 0.0, 3], 

76 ] 

77 ) 

78 

79 

80@pytest.fixture 

81def optimizer(configuration_space: CS.ConfigurationSpace) -> BaseOptimizer: 

82 """ 

83 Test fixture for the optimizer. 

84 

85 Use it to test one-hot encoding/decoding. 

86 """ 

87 return SmacOptimizer( 

88 parameter_space=configuration_space, 

89 optimization_targets=["score"], 

90 ) 

91 

92 

93def test_to_1hot_data_frame( 

94 optimizer: BaseOptimizer, 

95 data_frame: pd.DataFrame, 

96 one_hot_data_frame: npt.NDArray, 

97) -> None: 

98 """Toy problem to test one-hot encoding of dataframe.""" 

99 assert optimizer._to_1hot(config=data_frame) == pytest.approx(one_hot_data_frame) 

100 

101 

102def test_to_1hot_series( 

103 optimizer: BaseOptimizer, 

104 series: pd.Series, 

105 one_hot_series: npt.NDArray, 

106) -> None: 

107 """Toy problem to test one-hot encoding of series.""" 

108 assert optimizer._to_1hot(config=series) == pytest.approx(one_hot_series) 

109 

110 

111def test_from_1hot_data_frame( 

112 optimizer: BaseOptimizer, 

113 data_frame: pd.DataFrame, 

114 one_hot_data_frame: npt.NDArray, 

115) -> None: 

116 """Toy problem to test one-hot decoding of dataframe.""" 

117 assert optimizer._from_1hot(config=one_hot_data_frame).to_dict() == data_frame.to_dict() 

118 

119 

120def test_from_1hot_series( 

121 optimizer: BaseOptimizer, 

122 series: pd.Series, 

123 one_hot_series: npt.NDArray, 

124) -> None: 

125 """Toy problem to test one-hot decoding of series.""" 

126 one_hot_df = optimizer._from_1hot(config=one_hot_series) 

127 assert one_hot_df.shape[0] == 1, f"Unexpected number of rows ({one_hot_df.shape[0]} != 1)" 

128 assert one_hot_df.iloc[0].to_dict() == series.to_dict() 

129 

130 

131def test_round_trip_data_frame(optimizer: BaseOptimizer, data_frame: pd.DataFrame) -> None: 

132 """Round-trip test for one-hot-encoding and then decoding a data frame.""" 

133 df_round_trip = optimizer._from_1hot(config=optimizer._to_1hot(config=data_frame)) 

134 assert df_round_trip.x.to_numpy() == pytest.approx(data_frame.x) 

135 assert (df_round_trip.y == data_frame.y).all() 

136 assert (df_round_trip.z == data_frame.z).all() 

137 

138 

139def test_round_trip_series(optimizer: BaseOptimizer, series: pd.DataFrame) -> None: 

140 """Round-trip test for one-hot-encoding and then decoding a series.""" 

141 series_round_trip = optimizer._from_1hot(config=optimizer._to_1hot(config=series)) 

142 assert series_round_trip.x.to_numpy() == pytest.approx(series.x) 

143 assert (series_round_trip.y == series.y).all() 

144 assert (series_round_trip.z == series.z).all() 

145 

146 

147def test_round_trip_reverse_data_frame( 

148 optimizer: BaseOptimizer, 

149 one_hot_data_frame: npt.NDArray, 

150) -> None: 

151 """Round-trip test for one-hot-decoding and then encoding of a numpy array.""" 

152 round_trip = optimizer._to_1hot(config=optimizer._from_1hot(config=one_hot_data_frame)) 

153 assert round_trip == pytest.approx(one_hot_data_frame) 

154 

155 

156def test_round_trip_reverse_series(optimizer: BaseOptimizer, one_hot_series: npt.NDArray) -> None: 

157 """Round-trip test for one-hot-decoding and then encoding of a numpy array.""" 

158 round_trip = optimizer._to_1hot(config=optimizer._from_1hot(config=one_hot_series)) 

159 assert round_trip == pytest.approx(one_hot_series)