Coverage for mlos_core/mlos_core/optimizers/flaml_optimizer.py: 96%

57 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-07 01:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Contains the FlamlOptimizer class.""" 

6 

7from typing import Dict, List, NamedTuple, Optional, Tuple, Union 

8from warnings import warn 

9 

10import ConfigSpace 

11import numpy as np 

12import pandas as pd 

13 

14from mlos_core.optimizers.optimizer import BaseOptimizer 

15from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

16from mlos_core.util import drop_nulls, normalize_config 

17 

18 

19class EvaluatedSample(NamedTuple): 

20 """A named tuple representing a sample that has been evaluated.""" 

21 

22 config: dict 

23 score: float 

24 

25 

26class FlamlOptimizer(BaseOptimizer): 

27 """Wrapper class for FLAML Optimizer: A fast library for AutoML and tuning.""" 

28 

29 # The name of an internal objective attribute that is calculated as a weighted 

30 # average of the user provided objective metrics. 

31 _METRIC_NAME = "FLAML_score" 

32 

33 def __init__( 

34 self, 

35 *, # pylint: disable=too-many-arguments 

36 parameter_space: ConfigSpace.ConfigurationSpace, 

37 optimization_targets: List[str], 

38 objective_weights: Optional[List[float]] = None, 

39 space_adapter: Optional[BaseSpaceAdapter] = None, 

40 low_cost_partial_config: Optional[dict] = None, 

41 seed: Optional[int] = None, 

42 ): 

43 """ 

44 Create an MLOS wrapper for FLAML. 

45 

46 Parameters 

47 ---------- 

48 parameter_space : ConfigSpace.ConfigurationSpace 

49 The parameter space to optimize. 

50 

51 optimization_targets : List[str] 

52 The names of the optimization targets to minimize. 

53 

54 objective_weights : Optional[List[float]] 

55 Optional list of weights of optimization targets. 

56 

57 space_adapter : BaseSpaceAdapter 

58 The space adapter class to employ for parameter space transformations. 

59 

60 low_cost_partial_config : dict 

61 A dictionary from a subset of controlled dimensions to the initial low-cost values. 

62 More info: 

63 https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune 

64 

65 seed : Optional[int] 

66 If provided, calls np.random.seed() with the provided value to set the 

67 seed globally at init. 

68 """ 

69 super().__init__( 

70 parameter_space=parameter_space, 

71 optimization_targets=optimization_targets, 

72 objective_weights=objective_weights, 

73 space_adapter=space_adapter, 

74 ) 

75 

76 # Per upstream documentation, it is recommended to set the seed for 

77 # flaml at the start of its operation globally. 

78 if seed is not None: 

79 np.random.seed(seed) 

80 

81 # pylint: disable=import-outside-toplevel 

82 from mlos_core.spaces.converters.flaml import ( 

83 FlamlDomain, 

84 configspace_to_flaml_space, 

85 ) 

86 

87 self.flaml_parameter_space: Dict[str, FlamlDomain] = configspace_to_flaml_space( 

88 self.optimizer_parameter_space 

89 ) 

90 self.low_cost_partial_config = low_cost_partial_config 

91 

92 self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {} 

93 self._suggested_config: Optional[dict] 

94 

95 def _register( 

96 self, 

97 *, 

98 configs: pd.DataFrame, 

99 scores: pd.DataFrame, 

100 context: Optional[pd.DataFrame] = None, 

101 metadata: Optional[pd.DataFrame] = None, 

102 ) -> None: 

103 """ 

104 Registers the given configs and scores. 

105 

106 Parameters 

107 ---------- 

108 configs : pd.DataFrame 

109 Dataframe of configs / parameters. The columns are parameter names and 

110 the rows are the configs. 

111 

112 scores : pd.DataFrame 

113 Scores from running the configs. The index is the same as the index of the configs. 

114 

115 context : None 

116 Not Yet Implemented. 

117 

118 metadata : None 

119 Not Yet Implemented. 

120 """ 

121 if context is not None: 

122 warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) 

123 if metadata is not None: 

124 warn(f"Not Implemented: Ignoring metadata {list(metadata.columns)}", UserWarning) 

125 

126 for (_, config), (_, score) in zip(configs.astype("O").iterrows(), scores.iterrows()): 

127 # Remove None values for inactive config parameters 

128 config_dict = drop_nulls(config.to_dict()) 

129 cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( 

130 self.optimizer_parameter_space, 

131 values=config_dict, 

132 ) 

133 if cs_config in self.evaluated_samples: 

134 warn(f"Configuration {config} was already registered", UserWarning) 

135 self.evaluated_samples[cs_config] = EvaluatedSample( 

136 config=config_dict, 

137 score=float(np.average(score.astype(float), weights=self._objective_weights)), 

138 ) 

139 

140 def _suggest( 

141 self, 

142 *, 

143 context: Optional[pd.DataFrame] = None, 

144 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: 

145 """ 

146 Suggests a new configuration. 

147 

148 Sampled at random using ConfigSpace. 

149 

150 Parameters 

151 ---------- 

152 context : None 

153 Not Yet Implemented. 

154 

155 Returns 

156 ------- 

157 configuration : pd.DataFrame 

158 Pandas dataframe with a single row. Column names are the parameter names. 

159 

160 metadata : None 

161 Not implemented. 

162 """ 

163 if context is not None: 

164 warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) 

165 config: dict = self._get_next_config() 

166 return pd.DataFrame(config, index=[0]), None 

167 

168 def register_pending( 

169 self, 

170 *, 

171 configs: pd.DataFrame, 

172 context: Optional[pd.DataFrame] = None, 

173 metadata: Optional[pd.DataFrame] = None, 

174 ) -> None: 

175 raise NotImplementedError() 

176 

177 def _target_function(self, config: dict) -> Union[dict, None]: 

178 """ 

179 Configuration evaluation function called by FLAML optimizer. 

180 

181 FLAML may suggest the same configuration multiple times (due to its 

182 warm-start mechanism). Once FLAML suggests an unseen configuration, we 

183 store it, and stop the optimization process. 

184 

185 Parameters 

186 ---------- 

187 config: dict 

188 Next configuration to be evaluated, as suggested by FLAML. 

189 This config is stored internally and is returned to user, via 

190 `.suggest()` method. 

191 

192 Returns 

193 ------- 

194 result: Union[dict, None] 

195 Dictionary with a single key, `FLAML_score`, if config already 

196 evaluated; `None` otherwise. 

197 """ 

198 cs_config = normalize_config(self.optimizer_parameter_space, config) 

199 if cs_config in self.evaluated_samples: 

200 return {self._METRIC_NAME: self.evaluated_samples[cs_config].score} 

201 

202 self._suggested_config = dict(cs_config) # Cleaned-up version of the config 

203 return None # Returning None stops the process 

204 

205 def _get_next_config(self) -> dict: 

206 """ 

207 Warm-starts a new instance of FLAML, and returns a recommended, unseen new 

208 configuration. 

209 

210 Since FLAML does not provide an ask-and-tell interface, we need to create a 

211 new instance of FLAML each time we get asked for a new suggestion. This is 

212 suboptimal performance-wise, but works. 

213 To do so, we use any previously evaluated configs to bootstrap FLAML (i.e., 

214 warm-start). 

215 For more info: 

216 https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function#warm-start 

217 

218 Returns 

219 ------- 

220 result: dict 

221 A dictionary with a single key that is equal to the name of the optimization target, 

222 if config already evaluated; `None` otherwise. 

223 

224 Raises 

225 ------ 

226 RuntimeError: if FLAML did not suggest a previously unseen configuration. 

227 """ 

228 from flaml import tune # pylint: disable=import-outside-toplevel 

229 

230 # Parse evaluated configs to format used by FLAML 

231 points_to_evaluate: list = [] 

232 evaluated_rewards: list = [] 

233 if len(self.evaluated_samples) > 0: 

234 points_to_evaluate = [ 

235 dict(normalize_config(self.optimizer_parameter_space, conf)) 

236 for conf in self.evaluated_samples 

237 ] 

238 evaluated_rewards = [s.score for s in self.evaluated_samples.values()] 

239 

240 # Warm start FLAML optimizer 

241 self._suggested_config = None 

242 tune.run( 

243 self._target_function, 

244 config=self.flaml_parameter_space, 

245 mode="min", 

246 metric=self._METRIC_NAME, 

247 points_to_evaluate=points_to_evaluate, 

248 evaluated_rewards=evaluated_rewards, 

249 num_samples=len(points_to_evaluate) + 1, 

250 low_cost_partial_config=self.low_cost_partial_config, 

251 verbose=0, 

252 ) 

253 if self._suggested_config is None: 

254 raise RuntimeError("FLAML did not produce a suggestion") 

255 

256 return self._suggested_config # type: ignore[unreachable]