Coverage for mlos_core/mlos_core/optimizers/optimizer.py: 98%

119 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-21 01:50 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Contains the :py:class:`.BaseOptimizer` abstract class.""" 

6 

7import collections 

8from abc import ABCMeta, abstractmethod 

9from copy import deepcopy 

10 

11import ConfigSpace 

12import numpy as np 

13import numpy.typing as npt 

14import pandas as pd 

15 

16from mlos_core.data_classes import Observation, Observations, Suggestion 

17from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

18from mlos_core.util import config_to_series 

19 

20 

21class BaseOptimizer(metaclass=ABCMeta): 

22 """Optimizer abstract base class defining the basic interface: 

23 :py:meth:`~.BaseOptimizer.suggest`, 

24 :py:meth:`~.BaseOptimizer.register`, 

25 """ 

26 

27 # pylint: disable=too-many-instance-attributes 

28 

29 def __init__( 

30 self, 

31 *, 

32 parameter_space: ConfigSpace.ConfigurationSpace, 

33 optimization_targets: list[str], 

34 objective_weights: list[float] | None = None, 

35 space_adapter: BaseSpaceAdapter | None = None, 

36 ): 

37 """ 

38 Create a new instance of the base optimizer. 

39 

40 Parameters 

41 ---------- 

42 parameter_space : ConfigSpace.ConfigurationSpace 

43 The parameter space to optimize. 

44 optimization_targets : list[str] 

45 The names of the optimization targets to minimize. 

46 To maximize a target, use the negative of the target when registering scores. 

47 objective_weights : Optional[list[float]] 

48 Optional list of weights of optimization targets. 

49 space_adapter : BaseSpaceAdapter 

50 The space adapter class to employ for parameter space transformations. 

51 """ 

52 self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space 

53 """The parameter space to optimize.""" 

54 

55 self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = ( 

56 parameter_space if space_adapter is None else space_adapter.target_parameter_space 

57 ) 

58 """ 

59 The parameter space actually used by the optimizer. 

60 

61 (in case a :py:mod:`SpaceAdapter <mlos_core.spaces.adapters>` is used) 

62 """ 

63 

64 if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space: 

65 raise ValueError("Given parameter space differs from the one given to space adapter") 

66 

67 self._optimization_targets = optimization_targets 

68 self._objective_weights = objective_weights 

69 if objective_weights is not None and len(objective_weights) != len(optimization_targets): 

70 raise ValueError("Number of weights must match the number of optimization targets") 

71 

72 self._space_adapter: BaseSpaceAdapter | None = space_adapter 

73 self._observations: Observations = Observations() 

74 self._has_context: bool | None = None 

75 self._pending_observations: list[tuple[pd.DataFrame, pd.DataFrame | None]] = [] 

76 

77 def __repr__(self) -> str: 

78 return f"{self.__class__.__name__}(space_adapter={self.space_adapter})" 

79 

80 @property 

81 def space_adapter(self) -> BaseSpaceAdapter | None: 

82 """Get the space adapter instance (if any).""" 

83 return self._space_adapter 

84 

85 def register( 

86 self, 

87 observations: Observation | Observations, 

88 ) -> None: 

89 """ 

90 Register all observations at once. Exactly one of observations or observation 

91 must be provided. 

92 

93 Parameters 

94 ---------- 

95 observations: Optional[Union[Observation, Observations]] 

96 The observations to register. 

97 """ 

98 if isinstance(observations, Observation): 

99 observations = Observations(observations=[observations]) 

100 # Check input and transform the observations if a space adapter is present. 

101 observations = Observations( 

102 observations=[ 

103 self._preprocess_observation(observation) for observation in observations 

104 ] 

105 ) 

106 # Now bulk register all observations (details delegated to the underlying classes). 

107 self._register(observations) 

108 

109 def _preprocess_observation(self, observation: Observation) -> Observation: 

110 """ 

111 Wrapper method, which employs the space adapter (if any), and does some input 

112 validation, before registering the configs and scores. 

113 

114 Parameters 

115 ---------- 

116 observation: Observation 

117 The observation to register. 

118 

119 Returns 

120 ------- 

121 observation: Observation 

122 The (possibly transformed) observation to register. 

123 """ 

124 # Do some input validation. 

125 assert observation.metadata is None or isinstance(observation.metadata, pd.Series) 

126 assert set(observation.score.index) == set( 

127 self._optimization_targets 

128 ), "Mismatched optimization targets." 

129 assert self._has_context is None or self._has_context ^ ( 

130 observation.context is None 

131 ), "Context must always be added or never be added." 

132 assert len(observation.config) == len( 

133 self.parameter_space.values() 

134 ), "Mismatched configuration shape." 

135 

136 self._has_context = observation.context is not None 

137 self._observations.append(observation) 

138 

139 transformed_observation = deepcopy(observation) # Needed to support named tuples 

140 if self._space_adapter: 

141 transformed_observation = Observation( 

142 config=self._space_adapter.inverse_transform(transformed_observation.config), 

143 score=transformed_observation.score, 

144 context=transformed_observation.context, 

145 metadata=transformed_observation.metadata, 

146 ) 

147 assert len(transformed_observation.config) == len( 

148 self.optimizer_parameter_space.values() 

149 ), "Mismatched configuration shape after inverse transform." 

150 return transformed_observation 

151 

152 @abstractmethod 

153 def _register( 

154 self, 

155 observations: Observations, 

156 ) -> None: 

157 """ 

158 Registers the given configs and scores. 

159 

160 Parameters 

161 ---------- 

162 observations: Observations 

163 The set of observations to register. 

164 """ 

165 pass # pylint: disable=unnecessary-pass # pragma: no cover 

166 

167 def suggest( 

168 self, 

169 *, 

170 context: pd.Series | None = None, 

171 defaults: bool = False, 

172 ) -> Suggestion: 

173 """ 

174 Wrapper method, which employs the space adapter (if any), after suggesting a new 

175 configuration. 

176 

177 Parameters 

178 ---------- 

179 context : pandas.Series 

180 Not Yet Implemented. 

181 defaults : bool 

182 Whether or not to return the default config instead of an optimizer guided one. 

183 By default, use the one from the optimizer. 

184 

185 Returns 

186 ------- 

187 suggestion: Suggestion 

188 The suggested point to evaluate. 

189 """ 

190 if defaults: 

191 configuration = config_to_series(self.parameter_space.get_default_configuration()) 

192 if self.space_adapter is not None: 

193 configuration = self.space_adapter.inverse_transform(configuration) 

194 suggestion = Suggestion(config=configuration, context=context, metadata=None) 

195 else: 

196 suggestion = self._suggest(context=context) 

197 assert set(suggestion.config.index).issubset(set(self.optimizer_parameter_space)), ( 

198 "Optimizer suggested a configuration that does " 

199 "not match the expected parameter space." 

200 ) 

201 if self._space_adapter: 

202 suggestion = Suggestion( 

203 config=self._space_adapter.transform(suggestion.config), 

204 context=suggestion.context, 

205 metadata=suggestion.metadata, 

206 ) 

207 assert set(suggestion.config.index).issubset(set(self.parameter_space)), ( 

208 "Space adapter produced a configuration that does " 

209 "not match the expected parameter space." 

210 ) 

211 return suggestion 

212 

213 @abstractmethod 

214 def _suggest( 

215 self, 

216 *, 

217 context: pd.Series | None = None, 

218 ) -> Suggestion: 

219 """ 

220 Suggests a new configuration. 

221 

222 Parameters 

223 ---------- 

224 context : pandas.Series 

225 Not Yet Implemented. 

226 

227 Returns 

228 ------- 

229 suggestion: Suggestion 

230 The suggestion to evaluate. 

231 """ 

232 pass # pylint: disable=unnecessary-pass # pragma: no cover 

233 

234 @abstractmethod 

235 def register_pending(self, pending: Suggestion) -> None: 

236 """ 

237 Registers the given suggestion as "pending". That is it say, it has been 

238 suggested by the optimizer, and an experiment trial has been started. This can 

239 be useful for executing multiple trials in parallel, retry logic, etc. 

240 

241 Parameters 

242 ---------- 

243 pending: Suggestion 

244 The pending suggestion to register. 

245 """ 

246 pass # pylint: disable=unnecessary-pass # pragma: no cover 

247 

248 def get_observations(self) -> Observations: 

249 """ 

250 Returns the observations as a triplet of DataFrames (config, score, context). 

251 

252 Returns 

253 ------- 

254 observations : Observations 

255 All the observations registered so far. 

256 """ 

257 if len(self._observations) == 0: 

258 raise ValueError("No observations registered yet.") 

259 return self._observations 

260 

261 def get_best_observations( 

262 self, 

263 n_max: int = 1, 

264 ) -> Observations: 

265 """ 

266 Get the N best observations so far as a filtered version of Observations. 

267 Default is N=1. The columns are ordered in ASCENDING order of the optimization 

268 targets. The function uses `pandas.DataFrame.nsmallest(..., keep="first")` 

269 method under the hood. 

270 

271 Parameters 

272 ---------- 

273 n_max : int 

274 Maximum number of best observations to return. Default is 1. 

275 

276 Returns 

277 ------- 

278 observations : Observations 

279 A filtered version of Observations with the best N observations. 

280 """ 

281 observations = self.get_observations() 

282 if len(observations) == 0: 

283 raise ValueError("No observations registered yet.") 

284 

285 idx = observations.scores.nsmallest( 

286 n_max, 

287 columns=self._optimization_targets, 

288 keep="first", 

289 ).index 

290 return observations.filter_by_index(idx) 

291 

292 def cleanup(self) -> None: 

293 """ 

294 Remove temp files, release resources, etc. 

295 

296 after use. Default is no-op. Redefine this method in optimizers that require 

297 cleanup. 

298 """ 

299 

300 def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: 

301 """Convert numpy array from one-hot encoding to a DataFrame with categoricals 

302 and ints in proper columns. 

303 """ 

304 df_dict = collections.defaultdict(list) 

305 for i in range(config.shape[0]): 

306 j = 0 

307 for param in self.optimizer_parameter_space.values(): 

308 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

309 for offset, val in enumerate(param.choices): 

310 if config[i][j + offset] == 1: 

311 df_dict[param.name].append(val) 

312 break 

313 j += len(param.choices) 

314 else: 

315 val = config[i][j] 

316 if isinstance(param, ConfigSpace.UniformIntegerHyperparameter): 

317 val = int(val) 

318 df_dict[param.name].append(val) 

319 j += 1 

320 return pd.DataFrame(df_dict) 

321 

322 def _to_1hot(self, config: pd.DataFrame | pd.Series) -> npt.NDArray: 

323 """Convert pandas DataFrame to one-hot-encoded numpy array.""" 

324 n_cols = 0 

325 n_rows = config.shape[0] if config.ndim > 1 else 1 

326 for param in self.optimizer_parameter_space.values(): 

327 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

328 n_cols += len(param.choices) 

329 else: 

330 n_cols += 1 

331 one_hot = np.zeros((n_rows, n_cols), dtype=np.float32) 

332 for i in range(n_rows): 

333 j = 0 

334 for param in self.optimizer_parameter_space.values(): 

335 if config.ndim > 1: 

336 assert isinstance(config, pd.DataFrame) 

337 col = config.columns.get_loc(param.name) 

338 assert isinstance(col, int) 

339 val = config.iloc[i, col] 

340 else: 

341 assert isinstance(config, pd.Series) 

342 col = config.index.get_loc(param.name) 

343 assert isinstance(col, int) 

344 val = config.iloc[col] 

345 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

346 offset = param.choices.index(val) 

347 one_hot[i][j + offset] = 1 

348 j += len(param.choices) 

349 else: 

350 one_hot[i][j] = val 

351 j += 1 

352 return one_hot