Coverage for mlos_core/mlos_core/optimizers/optimizer.py: 99%

117 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-07 01:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Contains the BaseOptimizer abstract class.""" 

6 

7import collections 

8from abc import ABCMeta, abstractmethod 

9from typing import List, Optional, Tuple, Union 

10 

11import ConfigSpace 

12import numpy as np 

13import numpy.typing as npt 

14import pandas as pd 

15 

16from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

17from mlos_core.util import config_to_dataframe 

18 

19 

20class BaseOptimizer(metaclass=ABCMeta): 

21 """Optimizer abstract base class defining the basic interface.""" 

22 

23 # pylint: disable=too-many-instance-attributes 

24 

25 def __init__( 

26 self, 

27 *, 

28 parameter_space: ConfigSpace.ConfigurationSpace, 

29 optimization_targets: List[str], 

30 objective_weights: Optional[List[float]] = None, 

31 space_adapter: Optional[BaseSpaceAdapter] = None, 

32 ): 

33 """ 

34 Create a new instance of the base optimizer. 

35 

36 Parameters 

37 ---------- 

38 parameter_space : ConfigSpace.ConfigurationSpace 

39 The parameter space to optimize. 

40 optimization_targets : List[str] 

41 The names of the optimization targets to minimize. 

42 objective_weights : Optional[List[float]] 

43 Optional list of weights of optimization targets. 

44 space_adapter : BaseSpaceAdapter 

45 The space adapter class to employ for parameter space transformations. 

46 """ 

47 self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space 

48 self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = ( 

49 parameter_space if space_adapter is None else space_adapter.target_parameter_space 

50 ) 

51 

52 if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space: 

53 raise ValueError("Given parameter space differs from the one given to space adapter") 

54 

55 self._optimization_targets = optimization_targets 

56 self._objective_weights = objective_weights 

57 if objective_weights is not None and len(objective_weights) != len(optimization_targets): 

58 raise ValueError("Number of weights must match the number of optimization targets") 

59 

60 self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter 

61 self._observations: List[Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]] = [] 

62 self._has_context: Optional[bool] = None 

63 self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = [] 

64 

65 def __repr__(self) -> str: 

66 return f"{self.__class__.__name__}(space_adapter={self.space_adapter})" 

67 

68 @property 

69 def space_adapter(self) -> Optional[BaseSpaceAdapter]: 

70 """Get the space adapter instance (if any).""" 

71 return self._space_adapter 

72 

73 def register( 

74 self, 

75 *, 

76 configs: pd.DataFrame, 

77 scores: pd.DataFrame, 

78 context: Optional[pd.DataFrame] = None, 

79 metadata: Optional[pd.DataFrame] = None, 

80 ) -> None: 

81 """ 

82 Wrapper method, which employs the space adapter (if any), before registering the 

83 configs and scores. 

84 

85 Parameters 

86 ---------- 

87 configs : pd.DataFrame 

88 Dataframe of configs / parameters. The columns are parameter names and 

89 the rows are the configs. 

90 scores : pd.DataFrame 

91 Scores from running the configs. The index is the same as the index of the configs. 

92 

93 context : pd.DataFrame 

94 Not Yet Implemented. 

95 

96 metadata : Optional[pd.DataFrame] 

97 Metadata returned by the backend optimizer's suggest method. 

98 """ 

99 # Do some input validation. 

100 assert metadata is None or isinstance(metadata, pd.DataFrame) 

101 assert set(scores.columns) == set( 

102 self._optimization_targets 

103 ), "Mismatched optimization targets." 

104 assert self._has_context is None or self._has_context ^ ( 

105 context is None 

106 ), "Context must always be added or never be added." 

107 assert len(configs) == len(scores), "Mismatched number of configs and scores." 

108 if context is not None: 

109 assert len(configs) == len(context), "Mismatched number of configs and context." 

110 assert configs.shape[1] == len( 

111 self.parameter_space.values() 

112 ), "Mismatched configuration shape." 

113 self._observations.append((configs, scores, context)) 

114 self._has_context = context is not None 

115 

116 if self._space_adapter: 

117 configs = self._space_adapter.inverse_transform(configs) 

118 assert configs.shape[1] == len( 

119 self.optimizer_parameter_space.values() 

120 ), "Mismatched configuration shape after inverse transform." 

121 return self._register(configs=configs, scores=scores, context=context) 

122 

123 @abstractmethod 

124 def _register( 

125 self, 

126 *, 

127 configs: pd.DataFrame, 

128 scores: pd.DataFrame, 

129 context: Optional[pd.DataFrame] = None, 

130 metadata: Optional[pd.DataFrame] = None, 

131 ) -> None: 

132 """ 

133 Registers the given configs and scores. 

134 

135 Parameters 

136 ---------- 

137 configs : pd.DataFrame 

138 Dataframe of configs / parameters. The columns are parameter names and 

139 the rows are the configs. 

140 scores : pd.DataFrame 

141 Scores from running the configs. The index is the same as the index of the configs. 

142 

143 context : pd.DataFrame 

144 Not Yet Implemented. 

145 """ 

146 pass # pylint: disable=unnecessary-pass # pragma: no cover 

147 

148 def suggest( 

149 self, 

150 *, 

151 context: Optional[pd.DataFrame] = None, 

152 defaults: bool = False, 

153 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: 

154 """ 

155 Wrapper method, which employs the space adapter (if any), after suggesting a new 

156 configuration. 

157 

158 Parameters 

159 ---------- 

160 context : pd.DataFrame 

161 Not Yet Implemented. 

162 defaults : bool 

163 Whether or not to return the default config instead of an optimizer guided one. 

164 By default, use the one from the optimizer. 

165 

166 Returns 

167 ------- 

168 configuration : pd.DataFrame 

169 Pandas dataframe with a single row. Column names are the parameter names. 

170 

171 metadata : Optional[pd.DataFrame] 

172 The metadata associated with the given configuration used for evaluations. 

173 Backend optimizer specific. 

174 """ 

175 if defaults: 

176 configuration = config_to_dataframe(self.parameter_space.get_default_configuration()) 

177 metadata = None 

178 if self.space_adapter is not None: 

179 configuration = self.space_adapter.inverse_transform(configuration) 

180 else: 

181 configuration, metadata = self._suggest(context=context) 

182 assert len(configuration) == 1, "Suggest must return a single configuration." 

183 assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), ( 

184 "Optimizer suggested a configuration that does " 

185 "not match the expected parameter space." 

186 ) 

187 if self._space_adapter: 

188 configuration = self._space_adapter.transform(configuration) 

189 assert set(configuration.columns).issubset(set(self.parameter_space)), ( 

190 "Space adapter produced a configuration that does " 

191 "not match the expected parameter space." 

192 ) 

193 return configuration, metadata 

194 

195 @abstractmethod 

196 def _suggest( 

197 self, 

198 *, 

199 context: Optional[pd.DataFrame] = None, 

200 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: 

201 """ 

202 Suggests a new configuration. 

203 

204 Parameters 

205 ---------- 

206 context : pd.DataFrame 

207 Not Yet Implemented. 

208 

209 Returns 

210 ------- 

211 configuration : pd.DataFrame 

212 Pandas dataframe with a single row. Column names are the parameter names. 

213 

214 metadata : Optional[pd.DataFrame] 

215 The metadata associated with the given configuration used for evaluations. 

216 Backend optimizer specific. 

217 """ 

218 pass # pylint: disable=unnecessary-pass # pragma: no cover 

219 

220 @abstractmethod 

221 def register_pending( 

222 self, 

223 *, 

224 configs: pd.DataFrame, 

225 context: Optional[pd.DataFrame] = None, 

226 metadata: Optional[pd.DataFrame] = None, 

227 ) -> None: 

228 """ 

229 Registers the given configs as "pending". That is it say, it has been suggested 

230 by the optimizer, and an experiment trial has been started. This can be useful 

231 for executing multiple trials in parallel, retry logic, etc. 

232 

233 Parameters 

234 ---------- 

235 configs : pd.DataFrame 

236 Dataframe of configs / parameters. The columns are parameter names and 

237 the rows are the configs. 

238 context : pd.DataFrame 

239 Not Yet Implemented. 

240 metadata : Optional[pd.DataFrame] 

241 Metadata returned by the backend optimizer's suggest method. 

242 """ 

243 pass # pylint: disable=unnecessary-pass # pragma: no cover 

244 

245 def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: 

246 """ 

247 Returns the observations as a triplet of DataFrames (config, score, context). 

248 

249 Returns 

250 ------- 

251 observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]] 

252 A triplet of (config, score, context) DataFrames of observations. 

253 """ 

254 if len(self._observations) == 0: 

255 raise ValueError("No observations registered yet.") 

256 configs = pd.concat([config for config, _, _ in self._observations]).reset_index(drop=True) 

257 scores = pd.concat([score for _, score, _ in self._observations]).reset_index(drop=True) 

258 contexts = pd.concat( 

259 [ 

260 pd.DataFrame() if context is None else context 

261 for _, _, context in self._observations 

262 ] 

263 ).reset_index(drop=True) 

264 return (configs, scores, contexts if len(contexts.columns) > 0 else None) 

265 

266 def get_best_observations( 

267 self, 

268 *, 

269 n_max: int = 1, 

270 ) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: 

271 """ 

272 Get the N best observations so far as a triplet of DataFrames (config, score, 

273 context). Default is N=1. The columns are ordered in ASCENDING order of the 

274 optimization targets. The function uses `pandas.DataFrame.nsmallest(..., 

275 keep="first")` method under the hood. 

276 

277 Parameters 

278 ---------- 

279 n_max : int 

280 Maximum number of best observations to return. Default is 1. 

281 

282 Returns 

283 ------- 

284 observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]] 

285 A triplet of best (config, score, context) DataFrames of best observations. 

286 """ 

287 if len(self._observations) == 0: 

288 raise ValueError("No observations registered yet.") 

289 (configs, scores, contexts) = self.get_observations() 

290 idx = scores.nsmallest(n_max, columns=self._optimization_targets, keep="first").index 

291 return (configs.loc[idx], scores.loc[idx], None if contexts is None else contexts.loc[idx]) 

292 

293 def cleanup(self) -> None: 

294 """ 

295 Remove temp files, release resources, etc. 

296 

297 after use. Default is no-op. Redefine this method in optimizers that require 

298 cleanup. 

299 """ 

300 

301 def _from_1hot(self, *, config: npt.NDArray) -> pd.DataFrame: 

302 """Convert numpy array from one-hot encoding to a DataFrame with categoricals 

303 and ints in proper columns. 

304 """ 

305 df_dict = collections.defaultdict(list) 

306 for i in range(config.shape[0]): 

307 j = 0 

308 for param in self.optimizer_parameter_space.values(): 

309 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

310 for offset, val in enumerate(param.choices): 

311 if config[i][j + offset] == 1: 

312 df_dict[param.name].append(val) 

313 break 

314 j += len(param.choices) 

315 else: 

316 val = config[i][j] 

317 if isinstance(param, ConfigSpace.UniformIntegerHyperparameter): 

318 val = int(val) 

319 df_dict[param.name].append(val) 

320 j += 1 

321 return pd.DataFrame(df_dict) 

322 

323 def _to_1hot(self, *, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: 

324 """Convert pandas DataFrame to one-hot-encoded numpy array.""" 

325 n_cols = 0 

326 n_rows = config.shape[0] if config.ndim > 1 else 1 

327 for param in self.optimizer_parameter_space.values(): 

328 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

329 n_cols += len(param.choices) 

330 else: 

331 n_cols += 1 

332 one_hot = np.zeros((n_rows, n_cols), dtype=np.float32) 

333 for i in range(n_rows): 

334 j = 0 

335 for param in self.optimizer_parameter_space.values(): 

336 if config.ndim > 1: 

337 assert isinstance(config, pd.DataFrame) 

338 col = config.columns.get_loc(param.name) 

339 assert isinstance(col, int) 

340 val = config.iloc[i, col] 

341 else: 

342 assert isinstance(config, pd.Series) 

343 col = config.index.get_loc(param.name) 

344 assert isinstance(col, int) 

345 val = config.iloc[col] 

346 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

347 offset = param.choices.index(val) 

348 one_hot[i][j + offset] = 1 

349 j += len(param.choices) 

350 else: 

351 one_hot[i][j] = val 

352 j += 1 

353 return one_hot