Coverage for mlos_core/mlos_core/spaces/adapters/llamatune.py: 95%

180 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-07 01:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Implementation of LlamaTune space adapter.""" 

6import os 

7from typing import Dict, List, Optional, Union 

8from warnings import warn 

9 

10import ConfigSpace 

11import ConfigSpace.exceptions 

12import numpy as np 

13import numpy.typing as npt 

14import pandas as pd 

15from ConfigSpace.hyperparameters import NumericalHyperparameter 

16from sklearn.preprocessing import MinMaxScaler 

17 

18from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

19from mlos_core.util import drop_nulls, normalize_config 

20 

21 

22class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-attributes 

23 """Implementation of LlamaTune, a set of parameter space transformation techniques, 

24 aimed at improving the sample-efficiency of the underlying optimizer. 

25 """ 

26 

27 DEFAULT_NUM_LOW_DIMS = 16 

28 """Default number of dimensions in the low-dimensional search space, generated by 

29 HeSBO projection. 

30 """ 

31 

32 DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE = 0.2 

33 """Default percentage of bias for each special parameter value.""" 

34 

35 DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM = 10000 

36 """Default number of (max) unique values of each parameter, when space 

37 discretization is used. 

38 """ 

39 

40 def __init__( # pylint: disable=too-many-arguments 

41 self, 

42 *, 

43 orig_parameter_space: ConfigSpace.ConfigurationSpace, 

44 num_low_dims: int = DEFAULT_NUM_LOW_DIMS, 

45 special_param_values: Optional[dict] = None, 

46 max_unique_values_per_param: Optional[int] = DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM, 

47 use_approximate_reverse_mapping: bool = False, 

48 ): 

49 """ 

50 Create a space adapter that employs LlamaTune's techniques. 

51 

52 Parameters 

53 ---------- 

54 orig_parameter_space : ConfigSpace.ConfigurationSpace 

55 The original (user-provided) parameter space to optimize. 

56 num_low_dims: int 

57 Number of dimensions used in the low-dimensional parameter search space. 

58 special_param_values_dict: Optional[dict] 

59 Dictionary of special 

60 max_unique_values_per_param: Optional[int]: 

61 Number of unique values per parameter. Used to discretize the parameter space. 

62 If `None` space discretization is disabled. 

63 """ 

64 super().__init__(orig_parameter_space=orig_parameter_space) 

65 

66 if num_low_dims >= len(orig_parameter_space): 

67 raise ValueError( 

68 "Number of target config space dimensions should be " 

69 "less than those of original config space." 

70 ) 

71 

72 # Validate input special param values dict 

73 special_param_values = special_param_values or {} 

74 self._validate_special_param_values(special_param_values) 

75 

76 # Create low-dimensional parameter search space 

77 self._construct_low_dim_space(num_low_dims, max_unique_values_per_param) 

78 

79 # Initialize config values scaler: from (-1, 1) to (0, 1) range 

80 config_scaler = MinMaxScaler(feature_range=(0, 1)) 

81 ones_vector = np.ones(len(list(self.orig_parameter_space.values()))) 

82 config_scaler.fit([-ones_vector, ones_vector]) 

83 self._config_scaler = config_scaler 

84 

85 # Generate random mapping from low-dimensional space to original config space 

86 num_orig_dims = len(list(self.orig_parameter_space.values())) 

87 self._h_matrix = self._random_state.choice(range(num_low_dims), num_orig_dims) 

88 self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims) 

89 

90 # Used to retrieve the low-dim point, given the high-dim one 

91 self._suggested_configs: Dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {} 

92 self._pinv_matrix: npt.NDArray 

93 self._use_approximate_reverse_mapping = use_approximate_reverse_mapping 

94 

95 @property 

96 def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace: 

97 """Get the parameter space, which is explored by the underlying optimizer.""" 

98 return self._target_config_space 

99 

100 def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame: 

101 target_configurations = [] 

102 for _, config in configurations.astype("O").iterrows(): 

103 configuration = ConfigSpace.Configuration( 

104 self.orig_parameter_space, 

105 values=drop_nulls(config.to_dict()), 

106 ) 

107 

108 target_config = self._suggested_configs.get(configuration, None) 

109 # NOTE: HeSBO is a non-linear projection method, and does not inherently 

110 # support inverse projection 

111 # To (partly) support this operation, we keep track of the suggested 

112 # low-dim point(s) along with the respective high-dim point; this way we 

113 # can retrieve the low-dim point, from its high-dim counterpart. 

114 if target_config is None: 

115 # Inherently it is not supported to register points, which were not 

116 # suggested by the optimizer. 

117 if configuration == self.orig_parameter_space.get_default_configuration(): 

118 # Default configuration should always be registerable. 

119 pass 

120 elif not self._use_approximate_reverse_mapping: 

121 raise ValueError( 

122 f"{repr(configuration)}\n" 

123 "The above configuration was not suggested by the optimizer. " 

124 "Approximate reverse mapping is currently disabled; " 

125 "thus *only* configurations suggested " 

126 "previously by the optimizer can be registered." 

127 ) 

128 # else ... 

129 target_config = self._try_inverse_transform_config(configuration) 

130 

131 target_configurations.append(target_config) 

132 

133 return pd.DataFrame( 

134 target_configurations, 

135 columns=list(self.target_parameter_space.keys()), 

136 ) 

137 

138 def _try_inverse_transform_config( 

139 self, 

140 config: ConfigSpace.Configuration, 

141 ) -> ConfigSpace.Configuration: 

142 """ 

143 Attempts to generate an inverse mapping of the given configuration that wasn't 

144 previously registered. 

145 

146 Parameters 

147 ---------- 

148 configuration : ConfigSpace.Configuration 

149 Configuration in the original high-dimensional space. 

150 

151 Returns 

152 ------- 

153 ConfigSpace.Configuration 

154 Configuration in the low-dimensional space. 

155 

156 Raises 

157 ------ 

158 ValueError 

159 On conversion errors. 

160 """ 

161 # ...yet, we try to support that by implementing an approximate 

162 # reverse mapping using pseudo-inverse matrix. 

163 if getattr(self, "_pinv_matrix", None) is None: 

164 self._try_generate_approx_inverse_mapping() 

165 

166 # Replace NaNs with zeros for inactive hyperparameters 

167 config_vector = np.nan_to_num(config.get_array(), nan=0.0) 

168 # Perform approximate reverse mapping 

169 # NOTE: applying special value biasing is not possible 

170 vector: npt.NDArray = self._config_scaler.inverse_transform([config_vector])[0] 

171 target_config_vector: npt.NDArray = self._pinv_matrix.dot(vector) 

172 # Clip values to to [-1, 1] range of the low dimensional space. 

173 for idx, value in enumerate(target_config_vector): 

174 target_config_vector[idx] = np.clip(value, -1, 1) 

175 if self._q_scaler is not None: 

176 # If the max_unique_values_per_param is set, we need to scale 

177 # the low dimension space back to the discretized space as well. 

178 target_config_vector = self._q_scaler.inverse_transform([target_config_vector])[0] 

179 assert isinstance(target_config_vector, np.ndarray) 

180 # Clip values to [1, max_value] range (floating point errors may occur). 

181 for idx, value in enumerate(target_config_vector): 

182 target_config_vector[idx] = int(np.clip(value, 1, self._q_scaler.data_max_[idx])) 

183 target_config_vector = target_config_vector.astype(int) 

184 # Convert the vector to a dictionary. 

185 target_config_dict = dict( 

186 zip( 

187 self.target_parameter_space.keys(), 

188 target_config_vector, 

189 ) 

190 ) 

191 target_config = ConfigSpace.Configuration( 

192 self.target_parameter_space, 

193 values=target_config_dict, 

194 # This method results in hyperparameter type conversion issues 

195 # (e.g., float instead of int), so we use the values dict instead. 

196 # vector=target_config_vector, 

197 ) 

198 

199 # Check to see if the approximate reverse mapping looks OK. 

200 # Note: we know this isn't 100% accurate, so this is just a warning and 

201 # mostly meant for internal debugging. 

202 configuration_dict = dict(config) 

203 double_checked_config = self._transform(dict(target_config)) 

204 double_checked_config = { 

205 # Skip the special values that aren't in the original space. 

206 k: v 

207 for k, v in double_checked_config.items() 

208 if k in configuration_dict 

209 } 

210 if double_checked_config != configuration_dict and ( 

211 os.environ.get("MLOS_DEBUG", "false").lower() in {"1", "true", "y", "yes"} 

212 ): 

213 warn( 

214 ( 

215 f"Note: Configuration {configuration_dict} was inverse transformed to " 

216 f"{dict(target_config)} and then back to {double_checked_config}. " 

217 "This is an approximate reverse mapping for previously unregistered " 

218 "configurations, so this is just a warning." 

219 ), 

220 UserWarning, 

221 ) 

222 

223 # But the inverse mapping should at least be valid in the target space. 

224 try: 

225 ConfigSpace.Configuration( 

226 self.target_parameter_space, 

227 values=target_config, 

228 ).check_valid_configuration() 

229 except ConfigSpace.exceptions.IllegalValueError as e: 

230 raise ValueError( 

231 f"Invalid configuration {target_config} generated by " 

232 f"inverse mapping of {config}:\n{e}" 

233 ) from e 

234 

235 return target_config 

236 

237 def transform(self, configuration: pd.DataFrame) -> pd.DataFrame: 

238 if len(configuration) != 1: 

239 raise ValueError( 

240 "Configuration dataframe must contain exactly 1 row. " 

241 f"Found {len(configuration)} rows." 

242 ) 

243 

244 target_values_dict = configuration.iloc[0].to_dict() 

245 target_configuration = ConfigSpace.Configuration( 

246 self.target_parameter_space, 

247 values=target_values_dict, 

248 ) 

249 

250 orig_values_dict = self._transform(target_values_dict) 

251 orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict) 

252 

253 # Validate that the configuration is in the original space. 

254 try: 

255 ConfigSpace.Configuration( 

256 self.orig_parameter_space, 

257 values=orig_configuration, 

258 ).check_valid_configuration() 

259 except ConfigSpace.exceptions.IllegalValueError as e: 

260 raise ValueError( 

261 f"Invalid configuration {orig_configuration} generated by " 

262 f"transformation of {target_configuration}:\n{e}" 

263 ) from e 

264 

265 # Add to inverse dictionary -- needed for registering the performance later 

266 self._suggested_configs[orig_configuration] = target_configuration 

267 

268 return pd.DataFrame( 

269 [list(orig_configuration.values())], columns=list(orig_configuration.keys()) 

270 ) 

271 

272 def _construct_low_dim_space( 

273 self, 

274 num_low_dims: int, 

275 max_unique_values_per_param: Optional[int], 

276 ) -> None: 

277 """ 

278 Constructs the low-dimensional parameter (potentially discretized) search space. 

279 

280 Parameters 

281 ---------- 

282 num_low_dims : int 

283 Number of dimensions used in the low-dimensional parameter search space. 

284 

285 max_unique_values_per_param: Optional[int]: 

286 Number of unique values per parameter. Used to discretize the parameter space. 

287 If `None` space discretization is disabled. 

288 """ 

289 # Define target space parameters 

290 q_scaler = None 

291 hyperparameters: List[ 

292 Union[ConfigSpace.UniformFloatHyperparameter, ConfigSpace.UniformIntegerHyperparameter] 

293 ] 

294 if max_unique_values_per_param is None: 

295 hyperparameters = [ 

296 ConfigSpace.UniformFloatHyperparameter(name=f"dim_{idx}", lower=-1, upper=1) 

297 for idx in range(num_low_dims) 

298 ] 

299 else: 

300 # Currently supported optimizers do not support defining a discretized 

301 # space (like ConfigSpace does using `q` kwarg). 

302 # Thus, to support space discretization, we define the low-dimensional 

303 # space using integer hyperparameters. 

304 # We also employ a scaler, which scales suggested values to [-1, 1] 

305 # range, used by HeSBO projection. 

306 hyperparameters = [ 

307 ConfigSpace.UniformIntegerHyperparameter( 

308 name=f"dim_{idx}", 

309 lower=1, 

310 upper=max_unique_values_per_param, 

311 ) 

312 for idx in range(num_low_dims) 

313 ] 

314 

315 # Initialize quantized values scaler: 

316 # from [0, max_unique_values_per_param] to (-1, 1) range 

317 q_scaler = MinMaxScaler(feature_range=(-1, 1)) 

318 ones_vector = np.ones(num_low_dims) 

319 max_value_vector = ones_vector * max_unique_values_per_param 

320 q_scaler.fit([ones_vector, max_value_vector]) 

321 

322 self._q_scaler = q_scaler 

323 

324 # Construct low-dimensional parameter search space 

325 config_space = ConfigSpace.ConfigurationSpace(name=self.orig_parameter_space.name) 

326 # use same random state as in original parameter space 

327 config_space.random = self._random_state 

328 config_space.add(hyperparameters) 

329 self._target_config_space = config_space 

330 

331 def _transform(self, configuration: dict) -> dict: 

332 """ 

333 Projects a low-dimensional point (configuration) to the high-dimensional 

334 original parameter space, and then biases the resulting parameter values towards 

335 their special value(s) (if any). 

336 

337 Parameters 

338 ---------- 

339 configuration : dict 

340 Configuration in the low-dimensional space. 

341 

342 Returns 

343 ------- 

344 configuration : dict 

345 Projected configuration in the high-dimensional original search space. 

346 """ 

347 original_parameters = list(self.orig_parameter_space.values()) 

348 low_dim_config_values = list(configuration.values()) 

349 

350 if self._q_scaler is not None: 

351 # Scale parameter values from [1, max_value] to [-1, 1] 

352 low_dim_config_values = self._q_scaler.transform([low_dim_config_values])[0] 

353 

354 # Project low-dim point to original parameter space 

355 original_config_values = [ 

356 self._sigma_vector[idx] * low_dim_config_values[self._h_matrix[idx]] 

357 for idx in range(len(original_parameters)) 

358 ] 

359 # Scale parameter values to [0, 1] 

360 original_config_values = self._config_scaler.transform([original_config_values])[0] 

361 

362 original_config = {} 

363 for param, norm_value in zip(original_parameters, original_config_values): 

364 # Clip value to force it to fall in [0, 1] 

365 # NOTE: HeSBO projection ensures that theoretically but due to 

366 # floating point ops nuances this is not always guaranteed 

367 value = np.clip(norm_value, 0, 1) 

368 

369 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

370 index = int(value * len(param.choices)) # truncate integer part 

371 index = max(0, min(len(param.choices) - 1, index)) 

372 # NOTE: potential rounding here would be unfair to first & last values 

373 orig_value = param.choices[index] 

374 elif isinstance(param, NumericalHyperparameter): 

375 if param.name in self._special_param_values_dict: 

376 value = self._special_param_value_scaler(param, value) 

377 

378 orig_value = param.to_value(value) 

379 orig_value = np.clip(orig_value, param.lower, param.upper) 

380 else: 

381 raise NotImplementedError( 

382 "Only Categorical, Integer, and Float hyperparameters are currently supported." 

383 ) 

384 

385 original_config[param.name] = orig_value 

386 

387 return original_config 

388 

389 def _special_param_value_scaler( 

390 self, 

391 param: NumericalHyperparameter, 

392 input_value: float, 

393 ) -> float: 

394 """ 

395 Biases the special value(s) of this parameter, by shifting the normalized 

396 `input_value` towards those. 

397 

398 Parameters 

399 ---------- 

400 param: NumericalHyperparameter 

401 Parameter of the original parameter space. 

402 

403 input_value: float 

404 Normalized value for this parameter, as suggested by the underlying optimizer. 

405 

406 Returns 

407 ------- 

408 biased_value: float 

409 Normalized value after special value(s) biasing is applied. 

410 """ 

411 special_values_list = self._special_param_values_dict[param.name] 

412 

413 # Check if input value corresponds to some special value 

414 perc_sum = 0.0 

415 for special_value, biasing_perc in special_values_list: 

416 perc_sum += biasing_perc 

417 if input_value < perc_sum: 

418 return float(param.to_vector(special_value)) 

419 

420 # Scale input value uniformly to non-special values 

421 return float(param.to_vector((input_value - perc_sum) / (1 - perc_sum))) 

422 

423 # pylint: disable=too-complex,too-many-branches 

424 def _validate_special_param_values(self, special_param_values_dict: dict) -> None: 

425 """ 

426 Checks that the user-provided dict of special parameter values is valid. And 

427 assigns it to the corresponding attribute. 

428 

429 Parameters 

430 ---------- 

431 special_param_values_dict: dict 

432 User-provided dict of special parameter values. 

433 

434 Raises 

435 ------ 

436 ValueError: if dictionary key, valid, or structure is invalid. 

437 NotImplementedError: if special value is defined for a non-integer parameter 

438 """ 

439 error_prefix = "Validation of special parameter values dict failed." 

440 

441 all_parameters = list(self.orig_parameter_space.keys()) 

442 sanitized_dict = {} 

443 

444 for param, value in special_param_values_dict.items(): 

445 if param not in all_parameters: 

446 raise ValueError(error_prefix + f"Parameter '{param}' does not exist.") 

447 

448 hyperparameter = self.orig_parameter_space[param] 

449 if not isinstance(hyperparameter, ConfigSpace.UniformIntegerHyperparameter): 

450 raise NotImplementedError( 

451 error_prefix + f"Parameter '{param}' is not supported. " 

452 "Only Integer Hyperparameters are currently supported." 

453 ) 

454 

455 if isinstance(value, int): 

456 # User specifies a single special value -- default biasing percentage is used 

457 tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)] 

458 elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]: 

459 # User specifies both special value and biasing percentage 

460 tuple_list = [value] 

461 elif isinstance(value, list) and value: 

462 if all(isinstance(t, int) for t in value): 

463 # User specifies list of special values 

464 tuple_list = [ 

465 (v, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE) for v in value 

466 ] 

467 elif all( 

468 isinstance(t, tuple) and [type(v) for v in t] == [int, float] for t in value 

469 ): 

470 # User specifies list of tuples; each tuple defines the special 

471 # value and the biasing percentage 

472 tuple_list = value 

473 else: 

474 raise ValueError( 

475 error_prefix + f"Invalid format in value list for parameter '{param}'. " 

476 f"Special value list should contain either integers, " 

477 "or (special value, biasing %) tuples." 

478 ) 

479 else: 

480 raise ValueError( 

481 error_prefix + f"Invalid format for parameter '{param}'. Dict value should be " 

482 "an int, a (int, float) tuple, a list of integers, " 

483 "or a list of (int, float) tuples." 

484 ) 

485 

486 # Are user-specified special values valid? 

487 if not all(hyperparameter.lower <= v <= hyperparameter.upper for v, _ in tuple_list): 

488 raise ValueError( 

489 error_prefix 

490 + "One (or more) special values are outside of parameter " 

491 + f"'{param}' value domain." 

492 ) 

493 # Are user-provided special values unique? 

494 if len(set(v for v, _ in tuple_list)) != len(tuple_list): 

495 raise ValueError( 

496 error_prefix 

497 + "One (or more) special values are defined more than once " 

498 + f"for parameter '{param}'." 

499 ) 

500 # Are biasing percentages valid? 

501 if not all(0 < perc < 1 for _, perc in tuple_list): 

502 raise ValueError( 

503 error_prefix 

504 + f"One (or more) biasing percentages for parameter '{param}' are invalid: " 

505 "i.e., fall outside (0, 1) range." 

506 ) 

507 

508 total_percentage = sum(perc for _, perc in tuple_list) 

509 if total_percentage >= 1.0: 

510 raise ValueError( 

511 error_prefix 

512 + f"Total special values percentage for parameter '{param}' surpass 100%." 

513 ) 

514 # ... and reasonable? 

515 if total_percentage >= 0.5: 

516 warn( 

517 f"Total special values percentage for parameter '{param}' exceeds 50%.", 

518 UserWarning, 

519 ) 

520 

521 sanitized_dict[param] = tuple_list 

522 

523 self._special_param_values_dict = sanitized_dict 

524 

525 def _try_generate_approx_inverse_mapping(self) -> None: 

526 """Tries to generate an approximate reverse mapping: 

527 i.e., from high-dimensional space to the low-dimensional one. 

528 

529 Reverse mapping is generated using the pseudo-inverse matrix, of original 

530 HeSBO projection matrix. 

531 This mapping can be potentially used to register configurations that were 

532 *not* previously suggested by the optimizer. 

533 

534 NOTE: This method is experimental, and there is currently no guarantee that 

535 it works as expected. 

536 

537 Raises 

538 ------ 

539 RuntimeError: if reverse mapping computation fails. 

540 """ 

541 from scipy.linalg import ( # pylint: disable=import-outside-toplevel 

542 LinAlgError, 

543 pinv, 

544 ) 

545 

546 warn( 

547 ( 

548 "Trying to register a configuration that was not " 

549 "previously suggested by the optimizer.\n" 

550 "This inverse configuration transformation is typically not supported.\n" 

551 "However, we will try to register this configuration " 

552 "using an *experimental* method." 

553 ), 

554 UserWarning, 

555 ) 

556 

557 orig_space_num_dims = len(list(self.orig_parameter_space.values())) 

558 target_space_num_dims = len(list(self.target_parameter_space.values())) 

559 

560 # Construct dense projection matrix from sparse repr 

561 proj_matrix = np.zeros(shape=(orig_space_num_dims, target_space_num_dims)) 

562 for row, col in enumerate(self._h_matrix): 

563 proj_matrix[row][col] = self._sigma_vector[row] 

564 

565 # Compute pseudo-inverse matrix 

566 try: 

567 self._pinv_matrix = pinv(proj_matrix) 

568 except LinAlgError as err: 

569 raise RuntimeError( 

570 f"Unable to generate reverse mapping using pseudo-inverse matrix: {repr(err)}" 

571 ) from err 

572 assert self._pinv_matrix.shape == (target_space_num_dims, orig_space_num_dims)