Coverage for mlos_core/mlos_core/spaces/adapters/llamatune.py: 95%

177 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-14 01:58 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Implementation of LlamaTune space adapter. 

7 

8LlamaTune is a technique that transforms the original parameter space into a 

9lower-dimensional space to try and improve the sample efficiency of the underlying 

10optimizer by making use of the inherent parameter sensitivity correlations in most 

11systems. 

12 

13See Also: `LlamaTune: Sample-Efficient DBMS Configuration Tuning 

14<https://www.microsoft.com/en-us/research/publication/llamatune-sample-efficient-dbms-configuration-tuning>`_. 

15""" 

16import os 

17from typing import Dict, List, Optional, Union 

18from warnings import warn 

19 

20import ConfigSpace 

21import ConfigSpace.exceptions 

22import numpy as np 

23import numpy.typing as npt 

24import pandas as pd 

25from ConfigSpace.hyperparameters import NumericalHyperparameter 

26from sklearn.preprocessing import MinMaxScaler 

27 

28from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

29from mlos_core.util import normalize_config 

30 

31 

32class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-attributes 

33 """Implementation of LlamaTune, a set of parameter space transformation techniques, 

34 aimed at improving the sample-efficiency of the underlying optimizer. 

35 """ 

36 

37 DEFAULT_NUM_LOW_DIMS = 16 

38 """Default number of dimensions in the low-dimensional search space, generated by 

39 HeSBO projection. 

40 """ 

41 

42 DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE = 0.2 

43 """Default percentage of bias for each special parameter value.""" 

44 

45 DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM = 10000 

46 """Default number of (max) unique values of each parameter, when space 

47 discretization is used. 

48 """ 

49 

50 def __init__( # pylint: disable=too-many-arguments 

51 self, 

52 *, 

53 orig_parameter_space: ConfigSpace.ConfigurationSpace, 

54 num_low_dims: int = DEFAULT_NUM_LOW_DIMS, 

55 special_param_values: Optional[dict] = None, 

56 max_unique_values_per_param: Optional[int] = DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM, 

57 use_approximate_reverse_mapping: bool = False, 

58 ): 

59 """ 

60 Create a space adapter that employs LlamaTune's techniques. 

61 

62 Parameters 

63 ---------- 

64 orig_parameter_space : ConfigSpace.ConfigurationSpace 

65 The original (user-provided) parameter space to optimize. 

66 num_low_dims : int 

67 Number of dimensions used in the low-dimensional parameter search space. 

68 special_param_values_dict : Optional[dict] 

69 Dictionary of special 

70 max_unique_values_per_param : Optional[int] 

71 Number of unique values per parameter. Used to discretize the parameter space. 

72 If `None` space discretization is disabled. 

73 """ 

74 super().__init__(orig_parameter_space=orig_parameter_space) 

75 

76 if num_low_dims >= len(orig_parameter_space): 

77 raise ValueError( 

78 "Number of target config space dimensions should be " 

79 "less than those of original config space." 

80 ) 

81 

82 # Validate input special param values dict 

83 special_param_values = special_param_values or {} 

84 self._validate_special_param_values(special_param_values) 

85 

86 # Create low-dimensional parameter search space 

87 self._construct_low_dim_space(num_low_dims, max_unique_values_per_param) 

88 

89 # Initialize config values scaler: from (-1, 1) to (0, 1) range 

90 config_scaler = MinMaxScaler(feature_range=(0, 1)) 

91 ones_vector = np.ones(len(list(self.orig_parameter_space.values()))) 

92 config_scaler.fit(np.array([-ones_vector, ones_vector])) 

93 self._config_scaler = config_scaler 

94 

95 # Generate random mapping from low-dimensional space to original config space 

96 num_orig_dims = len(list(self.orig_parameter_space.values())) 

97 self._h_matrix = self._random_state.choice(range(num_low_dims), num_orig_dims) 

98 self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims) 

99 

100 # Used to retrieve the low-dim point, given the high-dim one 

101 self._suggested_configs: Dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {} 

102 self._pinv_matrix: npt.NDArray 

103 self._use_approximate_reverse_mapping = use_approximate_reverse_mapping 

104 

105 @property 

106 def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace: 

107 """Get the parameter space, which is explored by the underlying optimizer.""" 

108 return self._target_config_space 

109 

110 def inverse_transform(self, configuration: pd.Series) -> pd.Series: 

111 config = ConfigSpace.Configuration( 

112 self.orig_parameter_space, 

113 values=configuration.dropna().to_dict(), 

114 ) 

115 

116 target_config = self._suggested_configs.get(config, None) 

117 # NOTE: HeSBO is a non-linear projection method, and does not inherently 

118 # support inverse projection 

119 # To (partly) support this operation, we keep track of the suggested 

120 # low-dim point(s) along with the respective high-dim point; this way we 

121 # can retrieve the low-dim point, from its high-dim counterpart. 

122 if target_config is None: 

123 # Inherently it is not supported to register points, which were not 

124 # suggested by the optimizer. 

125 if config == self.orig_parameter_space.get_default_configuration(): 

126 # Default configuration should always be registerable. 

127 pass 

128 elif not self._use_approximate_reverse_mapping: 

129 raise ValueError( 

130 f"{repr(config)}\n" 

131 "The above configuration was not suggested by the optimizer. " 

132 "Approximate reverse mapping is currently disabled; " 

133 "thus *only* configurations suggested " 

134 "previously by the optimizer can be registered." 

135 ) 

136 

137 target_config = self._try_inverse_transform_config(config) 

138 

139 return pd.Series(target_config, index=list(self.target_parameter_space.keys())) 

140 

141 def _try_inverse_transform_config( 

142 self, 

143 config: ConfigSpace.Configuration, 

144 ) -> ConfigSpace.Configuration: 

145 """ 

146 Attempts to generate an inverse mapping of the given configuration that wasn't 

147 previously registered. 

148 

149 Parameters 

150 ---------- 

151 configuration : ConfigSpace.Configuration 

152 Configuration in the original high-dimensional space. 

153 

154 Returns 

155 ------- 

156 ConfigSpace.Configuration 

157 Configuration in the low-dimensional space. 

158 

159 Raises 

160 ------ 

161 ValueError 

162 On conversion errors. 

163 """ 

164 # ...yet, we try to support that by implementing an approximate 

165 # reverse mapping using pseudo-inverse matrix. 

166 if getattr(self, "_pinv_matrix", None) is None: 

167 self._try_generate_approx_inverse_mapping() 

168 

169 # Replace NaNs with zeros for inactive hyperparameters 

170 config_vector = np.nan_to_num(config.get_array(), nan=0.0) 

171 # Perform approximate reverse mapping 

172 # NOTE: applying special value biasing is not possible 

173 vector: npt.NDArray = self._config_scaler.inverse_transform(np.array([config_vector]))[0] 

174 target_config_vector: npt.NDArray = self._pinv_matrix.dot(vector) 

175 # Clip values to to [-1, 1] range of the low dimensional space. 

176 for idx, value in enumerate(target_config_vector): 

177 target_config_vector[idx] = np.clip(value, -1, 1) 

178 if self._q_scaler is not None: 

179 # If the max_unique_values_per_param is set, we need to scale 

180 # the low dimension space back to the discretized space as well. 

181 target_config_vector = self._q_scaler.inverse_transform( 

182 np.array([target_config_vector]) 

183 )[0] 

184 assert isinstance(target_config_vector, np.ndarray) 

185 # Clip values to [1, max_value] range (floating point errors may occur). 

186 for idx, value in enumerate(target_config_vector): 

187 target_config_vector[idx] = int(np.clip(value, 1, self._q_scaler.data_max_[idx])) 

188 target_config_vector = target_config_vector.astype(int) 

189 # Convert the vector to a dictionary. 

190 target_config_dict = dict( 

191 zip( 

192 self.target_parameter_space.keys(), 

193 target_config_vector, 

194 ) 

195 ) 

196 target_config = ConfigSpace.Configuration( 

197 self.target_parameter_space, 

198 values=target_config_dict, 

199 # This method results in hyperparameter type conversion issues 

200 # (e.g., float instead of int), so we use the values dict instead. 

201 # vector=target_config_vector, 

202 ) 

203 

204 # Check to see if the approximate reverse mapping looks OK. 

205 # Note: we know this isn't 100% accurate, so this is just a warning and 

206 # mostly meant for internal debugging. 

207 configuration_dict = dict(config) 

208 double_checked_config = self._transform(dict(target_config)) 

209 double_checked_config = { 

210 # Skip the special values that aren't in the original space. 

211 k: v 

212 for k, v in double_checked_config.items() 

213 if k in configuration_dict 

214 } 

215 if double_checked_config != configuration_dict and ( 

216 os.environ.get("MLOS_DEBUG", "false").lower() in {"1", "true", "y", "yes"} 

217 ): 

218 warn( 

219 ( 

220 f"Note: Configuration {configuration_dict} was inverse transformed to " 

221 f"{dict(target_config)} and then back to {double_checked_config}. " 

222 "This is an approximate reverse mapping for previously unregistered " 

223 "configurations, so this is just a warning." 

224 ), 

225 UserWarning, 

226 ) 

227 

228 # But the inverse mapping should at least be valid in the target space. 

229 try: 

230 ConfigSpace.Configuration( 

231 self.target_parameter_space, 

232 values=target_config, 

233 ).check_valid_configuration() 

234 except ConfigSpace.exceptions.IllegalValueError as err: 

235 raise ValueError( 

236 f"Invalid configuration {target_config} generated by " 

237 f"inverse mapping of {config}:\n{err}" 

238 ) from err 

239 

240 return target_config 

241 

242 def transform(self, configuration: pd.Series) -> pd.Series: 

243 target_values_dict = configuration.to_dict() 

244 target_configuration = ConfigSpace.Configuration( 

245 self.target_parameter_space, 

246 values=target_values_dict, 

247 ) 

248 

249 orig_values_dict = self._transform(target_values_dict) 

250 orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict) 

251 

252 # Validate that the configuration is in the original space. 

253 try: 

254 ConfigSpace.Configuration( 

255 self.orig_parameter_space, 

256 values=orig_configuration, 

257 ).check_valid_configuration() 

258 except ConfigSpace.exceptions.IllegalValueError as err: 

259 raise ValueError( 

260 f"Invalid configuration {orig_configuration} generated by " 

261 f"transformation of {target_configuration}:\n{err}" 

262 ) from err 

263 

264 # Add to inverse dictionary -- needed for registering the performance later 

265 self._suggested_configs[orig_configuration] = target_configuration 

266 

267 ret: pd.Series = pd.Series( 

268 list(orig_configuration.values()), index=list(orig_configuration.keys()) 

269 ) 

270 return ret 

271 

272 def _construct_low_dim_space( 

273 self, 

274 num_low_dims: int, 

275 max_unique_values_per_param: Optional[int], 

276 ) -> None: 

277 """ 

278 Constructs the low-dimensional parameter (potentially discretized) search space. 

279 

280 Parameters 

281 ---------- 

282 num_low_dims : int 

283 Number of dimensions used in the low-dimensional parameter search space. 

284 

285 max_unique_values_per_param: Optional[int]: 

286 Number of unique values per parameter. Used to discretize the parameter space. 

287 If `None` space discretization is disabled. 

288 """ 

289 # Define target space parameters 

290 q_scaler = None 

291 hyperparameters: List[ 

292 Union[ConfigSpace.UniformFloatHyperparameter, ConfigSpace.UniformIntegerHyperparameter] 

293 ] 

294 if max_unique_values_per_param is None: 

295 hyperparameters = [ 

296 ConfigSpace.UniformFloatHyperparameter(name=f"dim_{idx}", lower=-1, upper=1) 

297 for idx in range(num_low_dims) 

298 ] 

299 else: 

300 # Currently supported optimizers do not support defining a discretized 

301 # space (like ConfigSpace does using `q` kwarg). 

302 # Thus, to support space discretization, we define the low-dimensional 

303 # space using integer hyperparameters. 

304 # We also employ a scaler, which scales suggested values to [-1, 1] 

305 # range, used by HeSBO projection. 

306 hyperparameters = [ 

307 ConfigSpace.UniformIntegerHyperparameter( 

308 name=f"dim_{idx}", 

309 lower=1, 

310 upper=max_unique_values_per_param, 

311 ) 

312 for idx in range(num_low_dims) 

313 ] 

314 

315 # Initialize quantized values scaler: 

316 # from [0, max_unique_values_per_param] to (-1, 1) range 

317 q_scaler = MinMaxScaler(feature_range=(-1, 1)) 

318 ones_vector = np.ones(num_low_dims) 

319 max_value_vector = ones_vector * max_unique_values_per_param 

320 q_scaler.fit(np.array([ones_vector, max_value_vector])) 

321 

322 self._q_scaler = q_scaler 

323 

324 # Construct low-dimensional parameter search space 

325 config_space = ConfigSpace.ConfigurationSpace(name=self.orig_parameter_space.name) 

326 # use same random state as in original parameter space 

327 config_space.random = self._random_state 

328 config_space.add(hyperparameters) 

329 self._target_config_space = config_space 

330 

331 def _transform(self, configuration: dict) -> dict: 

332 """ 

333 Projects a low-dimensional point (configuration) to the high-dimensional 

334 original parameter space, and then biases the resulting parameter values towards 

335 their special value(s) (if any). 

336 

337 Parameters 

338 ---------- 

339 configuration : dict 

340 Configuration in the low-dimensional space. 

341 

342 Returns 

343 ------- 

344 configuration : dict 

345 Projected configuration in the high-dimensional original search space. 

346 """ 

347 original_parameters = list(self.orig_parameter_space.values()) 

348 low_dim_config_values = list(configuration.values()) 

349 

350 if self._q_scaler is not None: 

351 # Scale parameter values from [1, max_value] to [-1, 1] 

352 low_dim_config_values = self._q_scaler.transform(np.array([low_dim_config_values]))[0] 

353 

354 # Project low-dim point to original parameter space 

355 original_config_values = [ 

356 self._sigma_vector[idx] * low_dim_config_values[self._h_matrix[idx]] 

357 for idx in range(len(original_parameters)) 

358 ] 

359 # Scale parameter values to [0, 1] 

360 original_config_values = self._config_scaler.transform(np.array([original_config_values]))[ 

361 0 

362 ] 

363 

364 original_config = {} 

365 for param, norm_value in zip(original_parameters, original_config_values): 

366 # Clip value to force it to fall in [0, 1] 

367 # NOTE: HeSBO projection ensures that theoretically but due to 

368 # floating point ops nuances this is not always guaranteed 

369 value = np.clip(norm_value, 0, 1) 

370 

371 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

372 index = int(value * len(param.choices)) # truncate integer part 

373 index = max(0, min(len(param.choices) - 1, index)) 

374 # NOTE: potential rounding here would be unfair to first & last values 

375 orig_value = param.choices[index] 

376 elif isinstance(param, NumericalHyperparameter): 

377 if param.name in self._special_param_values_dict: 

378 value = self._special_param_value_scaler(param, value) 

379 

380 orig_value = param.to_value(value) 

381 orig_value = np.clip(orig_value, param.lower, param.upper) 

382 else: 

383 raise NotImplementedError( 

384 "Only Categorical, Integer, and Float hyperparameters are currently supported." 

385 ) 

386 

387 original_config[param.name] = orig_value 

388 

389 return original_config 

390 

391 def _special_param_value_scaler( 

392 self, 

393 param: NumericalHyperparameter, 

394 input_value: float, 

395 ) -> float: 

396 """ 

397 Biases the special value(s) of this parameter, by shifting the normalized 

398 `input_value` towards those. 

399 

400 Parameters 

401 ---------- 

402 param: NumericalHyperparameter 

403 Parameter of the original parameter space. 

404 

405 input_value: float 

406 Normalized value for this parameter, as suggested by the underlying optimizer. 

407 

408 Returns 

409 ------- 

410 biased_value: float 

411 Normalized value after special value(s) biasing is applied. 

412 """ 

413 special_values_list = self._special_param_values_dict[param.name] 

414 

415 # Check if input value corresponds to some special value 

416 perc_sum = 0.0 

417 for special_value, biasing_perc in special_values_list: 

418 perc_sum += biasing_perc 

419 if input_value < perc_sum: 

420 return float(param.to_vector(special_value)) 

421 

422 # Scale input value uniformly to non-special values 

423 return float(param.to_vector((input_value - perc_sum) / (1 - perc_sum))) 

424 

425 # pylint: disable=too-complex,too-many-branches 

426 def _validate_special_param_values(self, special_param_values_dict: dict) -> None: 

427 """ 

428 Checks that the user-provided dict of special parameter values is valid. And 

429 assigns it to the corresponding attribute. 

430 

431 Parameters 

432 ---------- 

433 special_param_values_dict: dict 

434 User-provided dict of special parameter values. 

435 

436 Raises 

437 ------ 

438 ValueError: if dictionary key, valid, or structure is invalid. 

439 NotImplementedError: if special value is defined for a non-integer parameter 

440 """ 

441 error_prefix = "Validation of special parameter values dict failed." 

442 

443 all_parameters = list(self.orig_parameter_space.keys()) 

444 sanitized_dict = {} 

445 

446 for param, value in special_param_values_dict.items(): 

447 if param not in all_parameters: 

448 raise ValueError(error_prefix + f"Parameter '{param}' does not exist.") 

449 

450 hyperparameter = self.orig_parameter_space[param] 

451 if not isinstance(hyperparameter, ConfigSpace.UniformIntegerHyperparameter): 

452 raise NotImplementedError( 

453 error_prefix + f"Parameter '{param}' is not supported. " 

454 "Only Integer Hyperparameters are currently supported." 

455 ) 

456 

457 if isinstance(value, int): 

458 # User specifies a single special value -- default biasing percentage is used 

459 tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)] 

460 elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]: 

461 # User specifies both special value and biasing percentage 

462 tuple_list = [value] 

463 elif isinstance(value, list) and value: 

464 if all(isinstance(t, int) for t in value): 

465 # User specifies list of special values 

466 tuple_list = [ 

467 (v, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE) for v in value 

468 ] 

469 elif all( 

470 isinstance(t, tuple) and [type(v) for v in t] == [int, float] for t in value 

471 ): 

472 # User specifies list of tuples; each tuple defines the special 

473 # value and the biasing percentage 

474 tuple_list = value 

475 else: 

476 raise ValueError( 

477 error_prefix + f"Invalid format in value list for parameter '{param}'. " 

478 f"Special value list should contain either integers, " 

479 "or (special value, biasing %) tuples." 

480 ) 

481 else: 

482 raise ValueError( 

483 error_prefix + f"Invalid format for parameter '{param}'. Dict value should be " 

484 "an int, a (int, float) tuple, a list of integers, " 

485 "or a list of (int, float) tuples." 

486 ) 

487 

488 # Are user-specified special values valid? 

489 if not all(hyperparameter.lower <= v <= hyperparameter.upper for v, _ in tuple_list): 

490 raise ValueError( 

491 error_prefix 

492 + "One (or more) special values are outside of parameter " 

493 + f"'{param}' value domain." 

494 ) 

495 # Are user-provided special values unique? 

496 if len(set(v for v, _ in tuple_list)) != len(tuple_list): 

497 raise ValueError( 

498 error_prefix 

499 + "One (or more) special values are defined more than once " 

500 + f"for parameter '{param}'." 

501 ) 

502 # Are biasing percentages valid? 

503 if not all(0 < perc < 1 for _, perc in tuple_list): 

504 raise ValueError( 

505 error_prefix 

506 + f"One (or more) biasing percentages for parameter '{param}' are invalid: " 

507 "i.e., fall outside (0, 1) range." 

508 ) 

509 

510 total_percentage = sum(perc for _, perc in tuple_list) 

511 if total_percentage >= 1.0: 

512 raise ValueError( 

513 error_prefix 

514 + f"Total special values percentage for parameter '{param}' surpass 100%." 

515 ) 

516 # ... and reasonable? 

517 if total_percentage >= 0.5: 

518 warn( 

519 f"Total special values percentage for parameter '{param}' exceeds 50%.", 

520 UserWarning, 

521 ) 

522 

523 sanitized_dict[param] = tuple_list 

524 

525 self._special_param_values_dict = sanitized_dict 

526 

527 def _try_generate_approx_inverse_mapping(self) -> None: 

528 """Tries to generate an approximate reverse mapping: 

529 i.e., from high-dimensional space to the low-dimensional one. 

530 

531 Reverse mapping is generated using the pseudo-inverse matrix, of original 

532 HeSBO projection matrix. 

533 This mapping can be potentially used to register configurations that were 

534 *not* previously suggested by the optimizer. 

535 

536 NOTE: This method is experimental, and there is currently no guarantee that 

537 it works as expected. 

538 

539 Raises 

540 ------ 

541 RuntimeError: if reverse mapping computation fails. 

542 """ 

543 from scipy.linalg import ( # pylint: disable=import-outside-toplevel 

544 LinAlgError, 

545 pinv, 

546 ) 

547 

548 warn( 

549 ( 

550 "Trying to register a configuration that was not " 

551 "previously suggested by the optimizer.\n" 

552 "This inverse configuration transformation is typically not supported.\n" 

553 "However, we will try to register this configuration " 

554 "using an *experimental* method." 

555 ), 

556 UserWarning, 

557 ) 

558 

559 orig_space_num_dims = len(list(self.orig_parameter_space.values())) 

560 target_space_num_dims = len(list(self.target_parameter_space.values())) 

561 

562 # Construct dense projection matrix from sparse repr 

563 proj_matrix = np.zeros(shape=(orig_space_num_dims, target_space_num_dims)) 

564 for row, col in enumerate(self._h_matrix): 

565 proj_matrix[row][col] = self._sigma_vector[row] 

566 

567 # Compute pseudo-inverse matrix 

568 try: 

569 inv_matrix: npt.NDArray = pinv(proj_matrix) 

570 self._pinv_matrix = inv_matrix 

571 except LinAlgError as err: 

572 raise RuntimeError( 

573 f"Unable to generate reverse mapping using pseudo-inverse matrix: {repr(err)}" 

574 ) from err 

575 assert self._pinv_matrix.shape == (target_space_num_dims, orig_space_num_dims)