Coverage for mlos_core/mlos_core/spaces/adapters/llamatune.py: 95%
177 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-14 01:58 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-14 01:58 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Implementation of LlamaTune space adapter.
8LlamaTune is a technique that transforms the original parameter space into a
9lower-dimensional space to try and improve the sample efficiency of the underlying
10optimizer by making use of the inherent parameter sensitivity correlations in most
11systems.
13See Also: `LlamaTune: Sample-Efficient DBMS Configuration Tuning
14<https://www.microsoft.com/en-us/research/publication/llamatune-sample-efficient-dbms-configuration-tuning>`_.
15"""
16import os
17from typing import Dict, List, Optional, Union
18from warnings import warn
20import ConfigSpace
21import ConfigSpace.exceptions
22import numpy as np
23import numpy.typing as npt
24import pandas as pd
25from ConfigSpace.hyperparameters import NumericalHyperparameter
26from sklearn.preprocessing import MinMaxScaler
28from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
29from mlos_core.util import normalize_config
32class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-attributes
33 """Implementation of LlamaTune, a set of parameter space transformation techniques,
34 aimed at improving the sample-efficiency of the underlying optimizer.
35 """
37 DEFAULT_NUM_LOW_DIMS = 16
38 """Default number of dimensions in the low-dimensional search space, generated by
39 HeSBO projection.
40 """
42 DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE = 0.2
43 """Default percentage of bias for each special parameter value."""
45 DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM = 10000
46 """Default number of (max) unique values of each parameter, when space
47 discretization is used.
48 """
50 def __init__( # pylint: disable=too-many-arguments
51 self,
52 *,
53 orig_parameter_space: ConfigSpace.ConfigurationSpace,
54 num_low_dims: int = DEFAULT_NUM_LOW_DIMS,
55 special_param_values: Optional[dict] = None,
56 max_unique_values_per_param: Optional[int] = DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM,
57 use_approximate_reverse_mapping: bool = False,
58 ):
59 """
60 Create a space adapter that employs LlamaTune's techniques.
62 Parameters
63 ----------
64 orig_parameter_space : ConfigSpace.ConfigurationSpace
65 The original (user-provided) parameter space to optimize.
66 num_low_dims : int
67 Number of dimensions used in the low-dimensional parameter search space.
68 special_param_values_dict : Optional[dict]
69 Dictionary of special
70 max_unique_values_per_param : Optional[int]
71 Number of unique values per parameter. Used to discretize the parameter space.
72 If `None` space discretization is disabled.
73 """
74 super().__init__(orig_parameter_space=orig_parameter_space)
76 if num_low_dims >= len(orig_parameter_space):
77 raise ValueError(
78 "Number of target config space dimensions should be "
79 "less than those of original config space."
80 )
82 # Validate input special param values dict
83 special_param_values = special_param_values or {}
84 self._validate_special_param_values(special_param_values)
86 # Create low-dimensional parameter search space
87 self._construct_low_dim_space(num_low_dims, max_unique_values_per_param)
89 # Initialize config values scaler: from (-1, 1) to (0, 1) range
90 config_scaler = MinMaxScaler(feature_range=(0, 1))
91 ones_vector = np.ones(len(list(self.orig_parameter_space.values())))
92 config_scaler.fit(np.array([-ones_vector, ones_vector]))
93 self._config_scaler = config_scaler
95 # Generate random mapping from low-dimensional space to original config space
96 num_orig_dims = len(list(self.orig_parameter_space.values()))
97 self._h_matrix = self._random_state.choice(range(num_low_dims), num_orig_dims)
98 self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims)
100 # Used to retrieve the low-dim point, given the high-dim one
101 self._suggested_configs: Dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {}
102 self._pinv_matrix: npt.NDArray
103 self._use_approximate_reverse_mapping = use_approximate_reverse_mapping
105 @property
106 def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace:
107 """Get the parameter space, which is explored by the underlying optimizer."""
108 return self._target_config_space
110 def inverse_transform(self, configuration: pd.Series) -> pd.Series:
111 config = ConfigSpace.Configuration(
112 self.orig_parameter_space,
113 values=configuration.dropna().to_dict(),
114 )
116 target_config = self._suggested_configs.get(config, None)
117 # NOTE: HeSBO is a non-linear projection method, and does not inherently
118 # support inverse projection
119 # To (partly) support this operation, we keep track of the suggested
120 # low-dim point(s) along with the respective high-dim point; this way we
121 # can retrieve the low-dim point, from its high-dim counterpart.
122 if target_config is None:
123 # Inherently it is not supported to register points, which were not
124 # suggested by the optimizer.
125 if config == self.orig_parameter_space.get_default_configuration():
126 # Default configuration should always be registerable.
127 pass
128 elif not self._use_approximate_reverse_mapping:
129 raise ValueError(
130 f"{repr(config)}\n"
131 "The above configuration was not suggested by the optimizer. "
132 "Approximate reverse mapping is currently disabled; "
133 "thus *only* configurations suggested "
134 "previously by the optimizer can be registered."
135 )
137 target_config = self._try_inverse_transform_config(config)
139 return pd.Series(target_config, index=list(self.target_parameter_space.keys()))
141 def _try_inverse_transform_config(
142 self,
143 config: ConfigSpace.Configuration,
144 ) -> ConfigSpace.Configuration:
145 """
146 Attempts to generate an inverse mapping of the given configuration that wasn't
147 previously registered.
149 Parameters
150 ----------
151 configuration : ConfigSpace.Configuration
152 Configuration in the original high-dimensional space.
154 Returns
155 -------
156 ConfigSpace.Configuration
157 Configuration in the low-dimensional space.
159 Raises
160 ------
161 ValueError
162 On conversion errors.
163 """
164 # ...yet, we try to support that by implementing an approximate
165 # reverse mapping using pseudo-inverse matrix.
166 if getattr(self, "_pinv_matrix", None) is None:
167 self._try_generate_approx_inverse_mapping()
169 # Replace NaNs with zeros for inactive hyperparameters
170 config_vector = np.nan_to_num(config.get_array(), nan=0.0)
171 # Perform approximate reverse mapping
172 # NOTE: applying special value biasing is not possible
173 vector: npt.NDArray = self._config_scaler.inverse_transform(np.array([config_vector]))[0]
174 target_config_vector: npt.NDArray = self._pinv_matrix.dot(vector)
175 # Clip values to to [-1, 1] range of the low dimensional space.
176 for idx, value in enumerate(target_config_vector):
177 target_config_vector[idx] = np.clip(value, -1, 1)
178 if self._q_scaler is not None:
179 # If the max_unique_values_per_param is set, we need to scale
180 # the low dimension space back to the discretized space as well.
181 target_config_vector = self._q_scaler.inverse_transform(
182 np.array([target_config_vector])
183 )[0]
184 assert isinstance(target_config_vector, np.ndarray)
185 # Clip values to [1, max_value] range (floating point errors may occur).
186 for idx, value in enumerate(target_config_vector):
187 target_config_vector[idx] = int(np.clip(value, 1, self._q_scaler.data_max_[idx]))
188 target_config_vector = target_config_vector.astype(int)
189 # Convert the vector to a dictionary.
190 target_config_dict = dict(
191 zip(
192 self.target_parameter_space.keys(),
193 target_config_vector,
194 )
195 )
196 target_config = ConfigSpace.Configuration(
197 self.target_parameter_space,
198 values=target_config_dict,
199 # This method results in hyperparameter type conversion issues
200 # (e.g., float instead of int), so we use the values dict instead.
201 # vector=target_config_vector,
202 )
204 # Check to see if the approximate reverse mapping looks OK.
205 # Note: we know this isn't 100% accurate, so this is just a warning and
206 # mostly meant for internal debugging.
207 configuration_dict = dict(config)
208 double_checked_config = self._transform(dict(target_config))
209 double_checked_config = {
210 # Skip the special values that aren't in the original space.
211 k: v
212 for k, v in double_checked_config.items()
213 if k in configuration_dict
214 }
215 if double_checked_config != configuration_dict and (
216 os.environ.get("MLOS_DEBUG", "false").lower() in {"1", "true", "y", "yes"}
217 ):
218 warn(
219 (
220 f"Note: Configuration {configuration_dict} was inverse transformed to "
221 f"{dict(target_config)} and then back to {double_checked_config}. "
222 "This is an approximate reverse mapping for previously unregistered "
223 "configurations, so this is just a warning."
224 ),
225 UserWarning,
226 )
228 # But the inverse mapping should at least be valid in the target space.
229 try:
230 ConfigSpace.Configuration(
231 self.target_parameter_space,
232 values=target_config,
233 ).check_valid_configuration()
234 except ConfigSpace.exceptions.IllegalValueError as err:
235 raise ValueError(
236 f"Invalid configuration {target_config} generated by "
237 f"inverse mapping of {config}:\n{err}"
238 ) from err
240 return target_config
242 def transform(self, configuration: pd.Series) -> pd.Series:
243 target_values_dict = configuration.to_dict()
244 target_configuration = ConfigSpace.Configuration(
245 self.target_parameter_space,
246 values=target_values_dict,
247 )
249 orig_values_dict = self._transform(target_values_dict)
250 orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict)
252 # Validate that the configuration is in the original space.
253 try:
254 ConfigSpace.Configuration(
255 self.orig_parameter_space,
256 values=orig_configuration,
257 ).check_valid_configuration()
258 except ConfigSpace.exceptions.IllegalValueError as err:
259 raise ValueError(
260 f"Invalid configuration {orig_configuration} generated by "
261 f"transformation of {target_configuration}:\n{err}"
262 ) from err
264 # Add to inverse dictionary -- needed for registering the performance later
265 self._suggested_configs[orig_configuration] = target_configuration
267 ret: pd.Series = pd.Series(
268 list(orig_configuration.values()), index=list(orig_configuration.keys())
269 )
270 return ret
272 def _construct_low_dim_space(
273 self,
274 num_low_dims: int,
275 max_unique_values_per_param: Optional[int],
276 ) -> None:
277 """
278 Constructs the low-dimensional parameter (potentially discretized) search space.
280 Parameters
281 ----------
282 num_low_dims : int
283 Number of dimensions used in the low-dimensional parameter search space.
285 max_unique_values_per_param: Optional[int]:
286 Number of unique values per parameter. Used to discretize the parameter space.
287 If `None` space discretization is disabled.
288 """
289 # Define target space parameters
290 q_scaler = None
291 hyperparameters: List[
292 Union[ConfigSpace.UniformFloatHyperparameter, ConfigSpace.UniformIntegerHyperparameter]
293 ]
294 if max_unique_values_per_param is None:
295 hyperparameters = [
296 ConfigSpace.UniformFloatHyperparameter(name=f"dim_{idx}", lower=-1, upper=1)
297 for idx in range(num_low_dims)
298 ]
299 else:
300 # Currently supported optimizers do not support defining a discretized
301 # space (like ConfigSpace does using `q` kwarg).
302 # Thus, to support space discretization, we define the low-dimensional
303 # space using integer hyperparameters.
304 # We also employ a scaler, which scales suggested values to [-1, 1]
305 # range, used by HeSBO projection.
306 hyperparameters = [
307 ConfigSpace.UniformIntegerHyperparameter(
308 name=f"dim_{idx}",
309 lower=1,
310 upper=max_unique_values_per_param,
311 )
312 for idx in range(num_low_dims)
313 ]
315 # Initialize quantized values scaler:
316 # from [0, max_unique_values_per_param] to (-1, 1) range
317 q_scaler = MinMaxScaler(feature_range=(-1, 1))
318 ones_vector = np.ones(num_low_dims)
319 max_value_vector = ones_vector * max_unique_values_per_param
320 q_scaler.fit(np.array([ones_vector, max_value_vector]))
322 self._q_scaler = q_scaler
324 # Construct low-dimensional parameter search space
325 config_space = ConfigSpace.ConfigurationSpace(name=self.orig_parameter_space.name)
326 # use same random state as in original parameter space
327 config_space.random = self._random_state
328 config_space.add(hyperparameters)
329 self._target_config_space = config_space
331 def _transform(self, configuration: dict) -> dict:
332 """
333 Projects a low-dimensional point (configuration) to the high-dimensional
334 original parameter space, and then biases the resulting parameter values towards
335 their special value(s) (if any).
337 Parameters
338 ----------
339 configuration : dict
340 Configuration in the low-dimensional space.
342 Returns
343 -------
344 configuration : dict
345 Projected configuration in the high-dimensional original search space.
346 """
347 original_parameters = list(self.orig_parameter_space.values())
348 low_dim_config_values = list(configuration.values())
350 if self._q_scaler is not None:
351 # Scale parameter values from [1, max_value] to [-1, 1]
352 low_dim_config_values = self._q_scaler.transform(np.array([low_dim_config_values]))[0]
354 # Project low-dim point to original parameter space
355 original_config_values = [
356 self._sigma_vector[idx] * low_dim_config_values[self._h_matrix[idx]]
357 for idx in range(len(original_parameters))
358 ]
359 # Scale parameter values to [0, 1]
360 original_config_values = self._config_scaler.transform(np.array([original_config_values]))[
361 0
362 ]
364 original_config = {}
365 for param, norm_value in zip(original_parameters, original_config_values):
366 # Clip value to force it to fall in [0, 1]
367 # NOTE: HeSBO projection ensures that theoretically but due to
368 # floating point ops nuances this is not always guaranteed
369 value = np.clip(norm_value, 0, 1)
371 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
372 index = int(value * len(param.choices)) # truncate integer part
373 index = max(0, min(len(param.choices) - 1, index))
374 # NOTE: potential rounding here would be unfair to first & last values
375 orig_value = param.choices[index]
376 elif isinstance(param, NumericalHyperparameter):
377 if param.name in self._special_param_values_dict:
378 value = self._special_param_value_scaler(param, value)
380 orig_value = param.to_value(value)
381 orig_value = np.clip(orig_value, param.lower, param.upper)
382 else:
383 raise NotImplementedError(
384 "Only Categorical, Integer, and Float hyperparameters are currently supported."
385 )
387 original_config[param.name] = orig_value
389 return original_config
391 def _special_param_value_scaler(
392 self,
393 param: NumericalHyperparameter,
394 input_value: float,
395 ) -> float:
396 """
397 Biases the special value(s) of this parameter, by shifting the normalized
398 `input_value` towards those.
400 Parameters
401 ----------
402 param: NumericalHyperparameter
403 Parameter of the original parameter space.
405 input_value: float
406 Normalized value for this parameter, as suggested by the underlying optimizer.
408 Returns
409 -------
410 biased_value: float
411 Normalized value after special value(s) biasing is applied.
412 """
413 special_values_list = self._special_param_values_dict[param.name]
415 # Check if input value corresponds to some special value
416 perc_sum = 0.0
417 for special_value, biasing_perc in special_values_list:
418 perc_sum += biasing_perc
419 if input_value < perc_sum:
420 return float(param.to_vector(special_value))
422 # Scale input value uniformly to non-special values
423 return float(param.to_vector((input_value - perc_sum) / (1 - perc_sum)))
425 # pylint: disable=too-complex,too-many-branches
426 def _validate_special_param_values(self, special_param_values_dict: dict) -> None:
427 """
428 Checks that the user-provided dict of special parameter values is valid. And
429 assigns it to the corresponding attribute.
431 Parameters
432 ----------
433 special_param_values_dict: dict
434 User-provided dict of special parameter values.
436 Raises
437 ------
438 ValueError: if dictionary key, valid, or structure is invalid.
439 NotImplementedError: if special value is defined for a non-integer parameter
440 """
441 error_prefix = "Validation of special parameter values dict failed."
443 all_parameters = list(self.orig_parameter_space.keys())
444 sanitized_dict = {}
446 for param, value in special_param_values_dict.items():
447 if param not in all_parameters:
448 raise ValueError(error_prefix + f"Parameter '{param}' does not exist.")
450 hyperparameter = self.orig_parameter_space[param]
451 if not isinstance(hyperparameter, ConfigSpace.UniformIntegerHyperparameter):
452 raise NotImplementedError(
453 error_prefix + f"Parameter '{param}' is not supported. "
454 "Only Integer Hyperparameters are currently supported."
455 )
457 if isinstance(value, int):
458 # User specifies a single special value -- default biasing percentage is used
459 tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)]
460 elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]:
461 # User specifies both special value and biasing percentage
462 tuple_list = [value]
463 elif isinstance(value, list) and value:
464 if all(isinstance(t, int) for t in value):
465 # User specifies list of special values
466 tuple_list = [
467 (v, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE) for v in value
468 ]
469 elif all(
470 isinstance(t, tuple) and [type(v) for v in t] == [int, float] for t in value
471 ):
472 # User specifies list of tuples; each tuple defines the special
473 # value and the biasing percentage
474 tuple_list = value
475 else:
476 raise ValueError(
477 error_prefix + f"Invalid format in value list for parameter '{param}'. "
478 f"Special value list should contain either integers, "
479 "or (special value, biasing %) tuples."
480 )
481 else:
482 raise ValueError(
483 error_prefix + f"Invalid format for parameter '{param}'. Dict value should be "
484 "an int, a (int, float) tuple, a list of integers, "
485 "or a list of (int, float) tuples."
486 )
488 # Are user-specified special values valid?
489 if not all(hyperparameter.lower <= v <= hyperparameter.upper for v, _ in tuple_list):
490 raise ValueError(
491 error_prefix
492 + "One (or more) special values are outside of parameter "
493 + f"'{param}' value domain."
494 )
495 # Are user-provided special values unique?
496 if len(set(v for v, _ in tuple_list)) != len(tuple_list):
497 raise ValueError(
498 error_prefix
499 + "One (or more) special values are defined more than once "
500 + f"for parameter '{param}'."
501 )
502 # Are biasing percentages valid?
503 if not all(0 < perc < 1 for _, perc in tuple_list):
504 raise ValueError(
505 error_prefix
506 + f"One (or more) biasing percentages for parameter '{param}' are invalid: "
507 "i.e., fall outside (0, 1) range."
508 )
510 total_percentage = sum(perc for _, perc in tuple_list)
511 if total_percentage >= 1.0:
512 raise ValueError(
513 error_prefix
514 + f"Total special values percentage for parameter '{param}' surpass 100%."
515 )
516 # ... and reasonable?
517 if total_percentage >= 0.5:
518 warn(
519 f"Total special values percentage for parameter '{param}' exceeds 50%.",
520 UserWarning,
521 )
523 sanitized_dict[param] = tuple_list
525 self._special_param_values_dict = sanitized_dict
527 def _try_generate_approx_inverse_mapping(self) -> None:
528 """Tries to generate an approximate reverse mapping:
529 i.e., from high-dimensional space to the low-dimensional one.
531 Reverse mapping is generated using the pseudo-inverse matrix, of original
532 HeSBO projection matrix.
533 This mapping can be potentially used to register configurations that were
534 *not* previously suggested by the optimizer.
536 NOTE: This method is experimental, and there is currently no guarantee that
537 it works as expected.
539 Raises
540 ------
541 RuntimeError: if reverse mapping computation fails.
542 """
543 from scipy.linalg import ( # pylint: disable=import-outside-toplevel
544 LinAlgError,
545 pinv,
546 )
548 warn(
549 (
550 "Trying to register a configuration that was not "
551 "previously suggested by the optimizer.\n"
552 "This inverse configuration transformation is typically not supported.\n"
553 "However, we will try to register this configuration "
554 "using an *experimental* method."
555 ),
556 UserWarning,
557 )
559 orig_space_num_dims = len(list(self.orig_parameter_space.values()))
560 target_space_num_dims = len(list(self.target_parameter_space.values()))
562 # Construct dense projection matrix from sparse repr
563 proj_matrix = np.zeros(shape=(orig_space_num_dims, target_space_num_dims))
564 for row, col in enumerate(self._h_matrix):
565 proj_matrix[row][col] = self._sigma_vector[row]
567 # Compute pseudo-inverse matrix
568 try:
569 inv_matrix: npt.NDArray = pinv(proj_matrix)
570 self._pinv_matrix = inv_matrix
571 except LinAlgError as err:
572 raise RuntimeError(
573 f"Unable to generate reverse mapping using pseudo-inverse matrix: {repr(err)}"
574 ) from err
575 assert self._pinv_matrix.shape == (target_space_num_dims, orig_space_num_dims)