Coverage for mlos_core/mlos_core/spaces/adapters/llamatune.py: 96%

2# Copyright (c) Microsoft Corporation.

3# Licensed under the MIT License.

5"""

6Implementation of LlamaTune space adapter.

8LlamaTune is a technique that transforms the original parameter space into a

9lower-dimensional space to try and improve the sample efficiency of the underlying

10optimizer by making use of the inherent parameter sensitivity correlations in most

11systems.

13See Also: `LlamaTune: Sample-Efficient DBMS Configuration Tuning

14<https://www.microsoft.com/en-us/research/publication/llamatune-sample-efficient-dbms-configuration-tuning>`_.

15"""

16import os

17from typing import Any

18from warnings import warn

20import ConfigSpace

21import ConfigSpace.exceptions

22import numpy as np

23import numpy.typing as npt

24import pandas as pd

25from ConfigSpace.hyperparameters import NumericalHyperparameter

26from sklearn.preprocessing import MinMaxScaler

28from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter

29from mlos_core.util import normalize_config

32class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-attributes

33 """Implementation of LlamaTune, a set of parameter space transformation techniques,

34 aimed at improving the sample-efficiency of the underlying optimizer.

35 """

37 DEFAULT_NUM_LOW_DIMS = 16

38 """Default number of dimensions in the low-dimensional search space, generated by

39 HeSBO projection.

40 """

42 DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE = 0.2

43 """Default percentage of bias for each special parameter value."""

45 DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM = 10000

46 """Default number of (max) unique values of each parameter, when space

47 discretization is used.

48 """

50 def __init__( # pylint: disable=too-many-arguments

51 self,

52 *,

53 orig_parameter_space: ConfigSpace.ConfigurationSpace,

54 num_low_dims: int = DEFAULT_NUM_LOW_DIMS,

55 special_param_values: dict | None = None,

56 max_unique_values_per_param: int | None = DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM,

57 use_approximate_reverse_mapping: bool = False,

58 ):

59 """

60 Create a space adapter that employs LlamaTune's techniques.

62 Parameters

63 ----------

64 orig_parameter_space : ConfigSpace.ConfigurationSpace

65 The original (user-provided) parameter space to optimize.

66 num_low_dims : int

67 Number of dimensions used in the low-dimensional parameter search space.

68 special_param_values_dict : dict | None

69 Dictionary of special

70 max_unique_values_per_param : int | None

71 Number of unique values per parameter. Used to discretize the parameter space.

72 If `None` space discretization is disabled.

73 """

74 super().__init__(orig_parameter_space=orig_parameter_space)

76 if num_low_dims >= len(orig_parameter_space):

77 raise ValueError(

78 "Number of target config space dimensions should be "

79 "less than those of original config space."

80 )

82 # Validate input special param values dict

83 special_param_values = special_param_values or {}

84 self._validate_special_param_values(special_param_values)

86 # Create low-dimensional parameter search space

87 self._construct_low_dim_space(num_low_dims, max_unique_values_per_param)

89 # Initialize config values scaler: from (-1, 1) to (0, 1) range

90 config_scaler = MinMaxScaler(feature_range=(0, 1))

91 ones_vector = np.ones(len(list(self.orig_parameter_space.values())))

92 config_scaler.fit(np.array([-ones_vector, ones_vector]))

93 self._config_scaler = config_scaler

95 # Generate random mapping from low-dimensional space to original config space

96 num_orig_dims = len(list(self.orig_parameter_space.values()))

97 self._h_matrix = self._random_state.choice(range(num_low_dims), num_orig_dims)

98 self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims)

100 # Used to retrieve the low-dim point, given the high-dim one

101 self._suggested_configs: dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {}

102 self._pinv_matrix: npt.NDArray

103 self._use_approximate_reverse_mapping = use_approximate_reverse_mapping

104

105 @property

106 def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace:

107 """Get the parameter space, which is explored by the underlying optimizer."""

108 return self._target_config_space

109

110 def inverse_transform(self, configuration: pd.Series) -> pd.Series:

111 config = ConfigSpace.Configuration(

112 self.orig_parameter_space,

113 values=configuration.dropna().to_dict(),

114 )

115

116 target_config = self._suggested_configs.get(config, None)

117 # NOTE: HeSBO is a non-linear projection method, and does not inherently

118 # support inverse projection

119 # To (partly) support this operation, we keep track of the suggested

120 # low-dim point(s) along with the respective high-dim point; this way we

121 # can retrieve the low-dim point, from its high-dim counterpart.

122 if target_config is None:

123 # Inherently it is not supported to register points, which were not

124 # suggested by the optimizer.

125 if config == self.orig_parameter_space.get_default_configuration():

126 # Default configuration should always be registerable.

127 pass

128 elif not self._use_approximate_reverse_mapping:

129 raise ValueError(

130 f"{repr(config)}\n"

131 "The above configuration was not suggested by the optimizer. "

132 "Approximate reverse mapping is currently disabled; "

133 "thus *only* configurations suggested "

134 "previously by the optimizer can be registered."

135 )

136

137 target_config = self._try_inverse_transform_config(config)

138

139 return pd.Series(target_config, index=list(self.target_parameter_space.keys()))

140

141 def _try_inverse_transform_config(

142 self,

143 config: ConfigSpace.Configuration,

144 ) -> ConfigSpace.Configuration:

145 """

146 Attempts to generate an inverse mapping of the given configuration that wasn't

147 previously registered.

148

149 Parameters

150 ----------

151 configuration : ConfigSpace.Configuration

152 Configuration in the original high-dimensional space.

153

154 Returns

155 -------

156 ConfigSpace.Configuration

157 Configuration in the low-dimensional space.

158

159 Raises

160 ------

161 ValueError

162 On conversion errors.

163 """

164 # ...yet, we try to support that by implementing an approximate

165 # reverse mapping using pseudo-inverse matrix.

166 if getattr(self, "_pinv_matrix", None) is None:

167 self._try_generate_approx_inverse_mapping()

168

169 # Replace NaNs with zeros for inactive hyperparameters

170 config_vector = np.nan_to_num(config.get_array(), nan=0.0)

171 # Perform approximate reverse mapping

172 # NOTE: applying special value biasing is not possible

173 vector: npt.NDArray = self._config_scaler.inverse_transform(np.array([config_vector]))[0]

174 target_config_vector: npt.NDArray = self._pinv_matrix.dot(vector)

175 # Clip values to to [-1, 1] range of the low dimensional space.

176 for idx, value in enumerate(target_config_vector):

177 target_config_vector[idx] = np.clip(value, -1, 1)

178 if self._q_scaler is not None:

179 # If the max_unique_values_per_param is set, we need to scale

180 # the low dimension space back to the discretized space as well.

181 target_config_vector = self._q_scaler.inverse_transform(

182 np.array([target_config_vector])

183 )[0]

184 assert isinstance(target_config_vector, np.ndarray)

185 # Clip values to [1, max_value] range (floating point errors may occur).

186 for idx, value in enumerate(target_config_vector):

187 target_config_vector[idx] = int(np.clip(value, 1, self._q_scaler.data_max_[idx]))

188 target_config_vector = target_config_vector.astype(int)

189 # Convert the vector to a dictionary.

190 target_config_dict = dict(

191 zip(

192 self.target_parameter_space.keys(),

193 target_config_vector,

194 )

195 )

196 target_config = ConfigSpace.Configuration(

197 self.target_parameter_space,

198 values=target_config_dict,

199 # This method results in hyperparameter type conversion issues

200 # (e.g., float instead of int), so we use the values dict instead.

201 # vector=target_config_vector,

202 )

203

204 # Check to see if the approximate reverse mapping looks OK.

205 # Note: we know this isn't 100% accurate, so this is just a warning and

206 # mostly meant for internal debugging.

207 configuration_dict = dict(config)

208 double_checked_config = self._transform(dict(target_config))

209 double_checked_config = {

210 # Skip the special values that aren't in the original space.

211 k: v

212 for k, v in double_checked_config.items()

213 if k in configuration_dict

214 }

215 if double_checked_config != configuration_dict and (

216 os.environ.get("MLOS_DEBUG", "false").lower() in {"1", "true", "y", "yes"}

217 ):

218 warn(

219 (

220 f"Note: Configuration {configuration_dict} was inverse transformed to "

221 f"{dict(target_config)} and then back to {double_checked_config}. "

222 "This is an approximate reverse mapping for previously unregistered "

223 "configurations, so this is just a warning."

224 ),

225 UserWarning,

226 )

227

228 # But the inverse mapping should at least be valid in the target space.

229 try:

230 ConfigSpace.Configuration(

231 self.target_parameter_space,

232 values=target_config,

233 ).check_valid_configuration()

234 except ConfigSpace.exceptions.IllegalValueError as err:

235 raise ValueError(

236 f"Invalid configuration {target_config} generated by "

237 f"inverse mapping of {config}:\n{err}"

238 ) from err

239

240 return target_config

241

242 def transform(self, configuration: pd.Series) -> pd.Series:

243 target_values_dict = configuration.to_dict()

244 target_configuration = ConfigSpace.Configuration(

245 self.target_parameter_space,

246 values=target_values_dict,

247 )

248

249 orig_values_dict = self._transform(target_values_dict)

250 orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict)

251

252 # Validate that the configuration is in the original space.

253 try:

254 ConfigSpace.Configuration(

255 self.orig_parameter_space,

256 values=orig_configuration,

257 ).check_valid_configuration()

258 except ConfigSpace.exceptions.IllegalValueError as err:

259 raise ValueError(

260 f"Invalid configuration {orig_configuration} generated by "

261 f"transformation of {target_configuration}:\n{err}"

262 ) from err

263

264 # Add to inverse dictionary -- needed for registering the performance later

265 self._suggested_configs[orig_configuration] = target_configuration

266

267 ret: pd.Series = pd.Series(

268 list(orig_configuration.values()), index=list(orig_configuration.keys())

269 )

270 return ret

271

272 def _construct_low_dim_space(

273 self,

274 num_low_dims: int,

275 max_unique_values_per_param: int | None,

276 ) -> None:

277 """

278 Constructs the low-dimensional parameter (potentially discretized) search space.

279

280 Parameters

281 ----------

282 num_low_dims : int

283 Number of dimensions used in the low-dimensional parameter search space.

284

285 max_unique_values_per_param: int | None:

286 Number of unique values per parameter. Used to discretize the parameter space.

287 If `None` space discretization is disabled.

288 """

289 # Define target space parameters

290 q_scaler = None

291 hyperparameters: list[

292 ConfigSpace.UniformFloatHyperparameter | ConfigSpace.UniformIntegerHyperparameter

293 ]

294 if max_unique_values_per_param is None:

295 hyperparameters = [

296 ConfigSpace.UniformFloatHyperparameter(name=f"dim_{idx}", lower=-1, upper=1)

297 for idx in range(num_low_dims)

298 ]

299 else:

300 # Currently supported optimizers do not support defining a discretized

301 # space (like ConfigSpace does using `q` kwarg).

302 # Thus, to support space discretization, we define the low-dimensional

303 # space using integer hyperparameters.

304 # We also employ a scaler, which scales suggested values to [-1, 1]

305 # range, used by HeSBO projection.

306 hyperparameters = [

307 ConfigSpace.UniformIntegerHyperparameter(

308 name=f"dim_{idx}",

309 lower=1,

310 upper=max_unique_values_per_param,

311 )

312 for idx in range(num_low_dims)

313 ]

314

315 # Initialize quantized values scaler:

316 # from [0, max_unique_values_per_param] to (-1, 1) range

317 q_scaler = MinMaxScaler(feature_range=(-1, 1))

318 ones_vector = np.ones(num_low_dims)

319 max_value_vector = ones_vector * max_unique_values_per_param

320 q_scaler.fit(np.array([ones_vector, max_value_vector]))

321

322 self._q_scaler = q_scaler

323

324 # Construct low-dimensional parameter search space

325 config_space = ConfigSpace.ConfigurationSpace(name=self.orig_parameter_space.name)

326 # use same random state as in original parameter space

327 config_space.random = self._random_state

328 config_space.add(hyperparameters)

329 self._target_config_space = config_space

330

331 def _transform(self, configuration: dict) -> dict:

332 """

333 Projects a low-dimensional point (configuration) to the high-dimensional

334 original parameter space, and then biases the resulting parameter values towards

335 their special value(s) (if any).

336

337 Parameters

338 ----------

339 configuration : dict

340 Configuration in the low-dimensional space.

341

342 Returns

343 -------

344 configuration : dict

345 Projected configuration in the high-dimensional original search space.

346 """

347 original_parameters = list(self.orig_parameter_space.values())

348 low_dim_config_values = list(configuration.values())

349

350 if self._q_scaler is not None:

351 # Scale parameter values from [1, max_value] to [-1, 1]

352 low_dim_config_values = self._q_scaler.transform(np.array([low_dim_config_values]))[0]

353

354 # Project low-dim point to original parameter space

355 original_config_values = [

356 self._sigma_vector[idx] * low_dim_config_values[self._h_matrix[idx]]

357 for idx in range(len(original_parameters))

358 ]

359 # Scale parameter values to [0, 1]

360 original_config_values = self._config_scaler.transform(np.array([original_config_values]))[

361 0

362 ]

363

364 original_config = {}

365 for param, norm_value in zip(original_parameters, original_config_values):

366 # Clip value to force it to fall in [0, 1]

367 # NOTE: HeSBO projection ensures that theoretically but due to

368 # floating point ops nuances this is not always guaranteed

369 value = np.clip(norm_value, 0, 1)

370

371 if isinstance(param, ConfigSpace.CategoricalHyperparameter):

372 index = int(value * len(param.choices)) # truncate integer part

373 index = max(0, min(len(param.choices) - 1, index))

374 # NOTE: potential rounding here would be unfair to first & last values

375 orig_value = param.choices[index]

376 elif isinstance(param, NumericalHyperparameter):

377 if param.name in self._special_param_values_dict:

378 value = self._special_param_value_scaler(param, value)

379

380 orig_value = param.to_value(value)

381 orig_value = np.clip(orig_value, param.lower, param.upper)

382 else:

383 raise NotImplementedError(

384 "Only Categorical, Integer, and Float hyperparameters are currently supported."

385 )

386

387 original_config[param.name] = orig_value

388

389 return original_config

390

391 def _special_param_value_scaler(

392 self,

393 param: NumericalHyperparameter,

394 input_value: float,

395 ) -> float:

396 """

397 Biases the special value(s) of this parameter, by shifting the normalized

398 `input_value` towards those.

399

400 Parameters

401 ----------

402 param: NumericalHyperparameter

403 Parameter of the original parameter space.

404

405 input_value: float

406 Normalized value for this parameter, as suggested by the underlying optimizer.

407

408 Returns

409 -------

410 biased_value: float

411 Normalized value after special value(s) biasing is applied.

412 """

413 special_values_list = self._special_param_values_dict[param.name]

414

415 # Check if input value corresponds to some special value

416 perc_sum = 0.0

417 for special_value, biasing_perc in special_values_list:

418 perc_sum += biasing_perc

419 if input_value < perc_sum:

420 return float(param.to_vector(special_value))

421

422 # Scale input value uniformly to non-special values

423 return float(param.to_vector((input_value - perc_sum) / (1 - perc_sum)))

424

425 # pylint: disable=too-complex,too-many-branches

426 def _validate_special_param_values(self, special_param_values_dict: dict) -> None:

427 """

428 Checks that the user-provided dict of special parameter values is valid. And

429 assigns it to the corresponding attribute.

430

431 Parameters

432 ----------

433 special_param_values_dict: dict

434 User-provided dict of special parameter values.

435

436 Raises

437 ------

438 ValueError: if dictionary key, valid, or structure is invalid.

439 NotImplementedError: if special value is defined for a non-integer parameter

440 """

441 error_prefix = "Validation of special parameter values dict failed."

442

443 all_parameters = list(self.orig_parameter_space.keys())

444 sanitized_dict = {}

445

446 for param, value in special_param_values_dict.items():

447 if param not in all_parameters:

448 raise ValueError(error_prefix + f"Parameter '{param}' does not exist.")

449

450 hyperparameter = self.orig_parameter_space[param]

451 if not isinstance(hyperparameter, ConfigSpace.UniformIntegerHyperparameter):

452 raise NotImplementedError(

453 error_prefix + f"Parameter '{param}' is not supported. "

454 "Only Integer Hyperparameters are currently supported."

455 )

456

457 if isinstance(value, int):

458 # User specifies a single special value -- default biasing percentage is used

459 tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)]

460 elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]:

461 # User specifies both special value and biasing percentage

462 tuple_list = [value]

463 elif isinstance(value, list) and value:

464 if all(isinstance(t, int) for t in value):

465 # User specifies list of special values

466 tuple_list = [

467 (v, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE) for v in value

468 ]

469 elif all(

470 isinstance(t, tuple) and [type(v) for v in t] == [int, float] for t in value

471 ):

472 # User specifies list of tuples; each tuple defines the special

473 # value and the biasing percentage

474 tuple_list = value

475 else:

476 raise ValueError(

477 error_prefix + f"Invalid format in value list for parameter '{param}'. "

478 f"Special value list should contain either integers, "

479 "or (special value, biasing %) tuples."

480 )

481 else:

482 raise ValueError(

483 error_prefix + f"Invalid format for parameter '{param}'. Dict value should be "

484 "an int, a (int, float) tuple, a list of integers, "

485 "or a list of (int, float) tuples."

486 )

487

488 # Are user-specified special values valid?

489 if not all(hyperparameter.lower <= v <= hyperparameter.upper for v, _ in tuple_list):

490 raise ValueError(

491 error_prefix

492 + "One (or more) special values are outside of parameter "

493 + f"'{param}' value domain."

494 )

495 # Are user-provided special values unique?

496 if len({v for v, _ in tuple_list}) != len(tuple_list):

497 raise ValueError(

498 error_prefix

499 + "One (or more) special values are defined more than once "

500 + f"for parameter '{param}'."

501 )

502 # Are biasing percentages valid?

503 if not all(0 < perc < 1 for _, perc in tuple_list):

504 raise ValueError(

505 error_prefix

506 + f"One (or more) biasing percentages for parameter '{param}' are invalid: "

507 "i.e., fall outside (0, 1) range."

508 )

509

510 total_percentage = sum(perc for _, perc in tuple_list)

511 if total_percentage >= 1.0:

512 raise ValueError(

513 error_prefix

514 + f"Total special values percentage for parameter '{param}' surpass 100%."

515 )

516 # ... and reasonable?

517 if total_percentage >= 0.5:

518 warn(

519 f"Total special values percentage for parameter '{param}' exceeds 50%.",

520 UserWarning,

521 )

522

523 sanitized_dict[param] = tuple_list

524

525 self._special_param_values_dict = sanitized_dict

526

527 def _try_generate_approx_inverse_mapping(self) -> None:

528 """Tries to generate an approximate reverse mapping:

529 i.e., from high-dimensional space to the low-dimensional one.

530

531 Reverse mapping is generated using the pseudo-inverse matrix, of original

532 HeSBO projection matrix.

533 This mapping can be potentially used to register configurations that were

534 *not* previously suggested by the optimizer.

535

536 NOTE: This method is experimental, and there is currently no guarantee that

537 it works as expected.

538

539 Raises

540 ------

541 RuntimeError: if reverse mapping computation fails.

542 """

543 from scipy.linalg import ( # pylint: disable=import-outside-toplevel

544 LinAlgError,

545 pinv,

546 )

547

548 warn(

549 (

550 "Trying to register a configuration that was not "

551 "previously suggested by the optimizer.\n"

552 "This inverse configuration transformation is typically not supported.\n"

553 "However, we will try to register this configuration "

554 "using an *experimental* method."

555 ),

556 UserWarning,

557 )

558

559 orig_space_num_dims = len(list(self.orig_parameter_space.values()))

560 target_space_num_dims = len(list(self.target_parameter_space.values()))

561

562 # Construct dense projection matrix from sparse repr

563 proj_matrix = np.zeros(shape=(orig_space_num_dims, target_space_num_dims))

564 for row, col in enumerate(self._h_matrix):

565 proj_matrix[row][col] = self._sigma_vector[row]

566

567 # Compute pseudo-inverse matrix

568 try:

569 _inv = pinv(proj_matrix)

570 assert _inv is not None and not isinstance(_inv, tuple)

571 inv_matrix: npt.NDArray[np.floating[Any]] = _inv

572 self._pinv_matrix = inv_matrix

573 except LinAlgError as err:

574 raise RuntimeError(

575 f"Unable to generate reverse mapping using pseudo-inverse matrix: {repr(err)}"

576 ) from err

577 assert self._pinv_matrix.shape == (target_space_num_dims, orig_space_num_dims)