Coverage for mlos_core/mlos_core/spaces/adapters/llamatune.py: 95%
180 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Implementation of LlamaTune space adapter."""
6import os
7from typing import Dict, List, Optional, Union
8from warnings import warn
10import ConfigSpace
11import ConfigSpace.exceptions
12import numpy as np
13import numpy.typing as npt
14import pandas as pd
15from ConfigSpace.hyperparameters import NumericalHyperparameter
16from sklearn.preprocessing import MinMaxScaler
18from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
19from mlos_core.util import drop_nulls, normalize_config
22class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-attributes
23 """Implementation of LlamaTune, a set of parameter space transformation techniques,
24 aimed at improving the sample-efficiency of the underlying optimizer.
25 """
27 DEFAULT_NUM_LOW_DIMS = 16
28 """Default number of dimensions in the low-dimensional search space, generated by
29 HeSBO projection.
30 """
32 DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE = 0.2
33 """Default percentage of bias for each special parameter value."""
35 DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM = 10000
36 """Default number of (max) unique values of each parameter, when space
37 discretization is used.
38 """
40 def __init__( # pylint: disable=too-many-arguments
41 self,
42 *,
43 orig_parameter_space: ConfigSpace.ConfigurationSpace,
44 num_low_dims: int = DEFAULT_NUM_LOW_DIMS,
45 special_param_values: Optional[dict] = None,
46 max_unique_values_per_param: Optional[int] = DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM,
47 use_approximate_reverse_mapping: bool = False,
48 ):
49 """
50 Create a space adapter that employs LlamaTune's techniques.
52 Parameters
53 ----------
54 orig_parameter_space : ConfigSpace.ConfigurationSpace
55 The original (user-provided) parameter space to optimize.
56 num_low_dims: int
57 Number of dimensions used in the low-dimensional parameter search space.
58 special_param_values_dict: Optional[dict]
59 Dictionary of special
60 max_unique_values_per_param: Optional[int]:
61 Number of unique values per parameter. Used to discretize the parameter space.
62 If `None` space discretization is disabled.
63 """
64 super().__init__(orig_parameter_space=orig_parameter_space)
66 if num_low_dims >= len(orig_parameter_space):
67 raise ValueError(
68 "Number of target config space dimensions should be "
69 "less than those of original config space."
70 )
72 # Validate input special param values dict
73 special_param_values = special_param_values or {}
74 self._validate_special_param_values(special_param_values)
76 # Create low-dimensional parameter search space
77 self._construct_low_dim_space(num_low_dims, max_unique_values_per_param)
79 # Initialize config values scaler: from (-1, 1) to (0, 1) range
80 config_scaler = MinMaxScaler(feature_range=(0, 1))
81 ones_vector = np.ones(len(list(self.orig_parameter_space.values())))
82 config_scaler.fit([-ones_vector, ones_vector])
83 self._config_scaler = config_scaler
85 # Generate random mapping from low-dimensional space to original config space
86 num_orig_dims = len(list(self.orig_parameter_space.values()))
87 self._h_matrix = self._random_state.choice(range(num_low_dims), num_orig_dims)
88 self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims)
90 # Used to retrieve the low-dim point, given the high-dim one
91 self._suggested_configs: Dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {}
92 self._pinv_matrix: npt.NDArray
93 self._use_approximate_reverse_mapping = use_approximate_reverse_mapping
95 @property
96 def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace:
97 """Get the parameter space, which is explored by the underlying optimizer."""
98 return self._target_config_space
100 def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame:
101 target_configurations = []
102 for _, config in configurations.astype("O").iterrows():
103 configuration = ConfigSpace.Configuration(
104 self.orig_parameter_space,
105 values=drop_nulls(config.to_dict()),
106 )
108 target_config = self._suggested_configs.get(configuration, None)
109 # NOTE: HeSBO is a non-linear projection method, and does not inherently
110 # support inverse projection
111 # To (partly) support this operation, we keep track of the suggested
112 # low-dim point(s) along with the respective high-dim point; this way we
113 # can retrieve the low-dim point, from its high-dim counterpart.
114 if target_config is None:
115 # Inherently it is not supported to register points, which were not
116 # suggested by the optimizer.
117 if configuration == self.orig_parameter_space.get_default_configuration():
118 # Default configuration should always be registerable.
119 pass
120 elif not self._use_approximate_reverse_mapping:
121 raise ValueError(
122 f"{repr(configuration)}\n"
123 "The above configuration was not suggested by the optimizer. "
124 "Approximate reverse mapping is currently disabled; "
125 "thus *only* configurations suggested "
126 "previously by the optimizer can be registered."
127 )
128 # else ...
129 target_config = self._try_inverse_transform_config(configuration)
131 target_configurations.append(target_config)
133 return pd.DataFrame(
134 target_configurations,
135 columns=list(self.target_parameter_space.keys()),
136 )
138 def _try_inverse_transform_config(
139 self,
140 config: ConfigSpace.Configuration,
141 ) -> ConfigSpace.Configuration:
142 """
143 Attempts to generate an inverse mapping of the given configuration that wasn't
144 previously registered.
146 Parameters
147 ----------
148 configuration : ConfigSpace.Configuration
149 Configuration in the original high-dimensional space.
151 Returns
152 -------
153 ConfigSpace.Configuration
154 Configuration in the low-dimensional space.
156 Raises
157 ------
158 ValueError
159 On conversion errors.
160 """
161 # ...yet, we try to support that by implementing an approximate
162 # reverse mapping using pseudo-inverse matrix.
163 if getattr(self, "_pinv_matrix", None) is None:
164 self._try_generate_approx_inverse_mapping()
166 # Replace NaNs with zeros for inactive hyperparameters
167 config_vector = np.nan_to_num(config.get_array(), nan=0.0)
168 # Perform approximate reverse mapping
169 # NOTE: applying special value biasing is not possible
170 vector: npt.NDArray = self._config_scaler.inverse_transform([config_vector])[0]
171 target_config_vector: npt.NDArray = self._pinv_matrix.dot(vector)
172 # Clip values to to [-1, 1] range of the low dimensional space.
173 for idx, value in enumerate(target_config_vector):
174 target_config_vector[idx] = np.clip(value, -1, 1)
175 if self._q_scaler is not None:
176 # If the max_unique_values_per_param is set, we need to scale
177 # the low dimension space back to the discretized space as well.
178 target_config_vector = self._q_scaler.inverse_transform([target_config_vector])[0]
179 assert isinstance(target_config_vector, np.ndarray)
180 # Clip values to [1, max_value] range (floating point errors may occur).
181 for idx, value in enumerate(target_config_vector):
182 target_config_vector[idx] = int(np.clip(value, 1, self._q_scaler.data_max_[idx]))
183 target_config_vector = target_config_vector.astype(int)
184 # Convert the vector to a dictionary.
185 target_config_dict = dict(
186 zip(
187 self.target_parameter_space.keys(),
188 target_config_vector,
189 )
190 )
191 target_config = ConfigSpace.Configuration(
192 self.target_parameter_space,
193 values=target_config_dict,
194 # This method results in hyperparameter type conversion issues
195 # (e.g., float instead of int), so we use the values dict instead.
196 # vector=target_config_vector,
197 )
199 # Check to see if the approximate reverse mapping looks OK.
200 # Note: we know this isn't 100% accurate, so this is just a warning and
201 # mostly meant for internal debugging.
202 configuration_dict = dict(config)
203 double_checked_config = self._transform(dict(target_config))
204 double_checked_config = {
205 # Skip the special values that aren't in the original space.
206 k: v
207 for k, v in double_checked_config.items()
208 if k in configuration_dict
209 }
210 if double_checked_config != configuration_dict and (
211 os.environ.get("MLOS_DEBUG", "false").lower() in {"1", "true", "y", "yes"}
212 ):
213 warn(
214 (
215 f"Note: Configuration {configuration_dict} was inverse transformed to "
216 f"{dict(target_config)} and then back to {double_checked_config}. "
217 "This is an approximate reverse mapping for previously unregistered "
218 "configurations, so this is just a warning."
219 ),
220 UserWarning,
221 )
223 # But the inverse mapping should at least be valid in the target space.
224 try:
225 ConfigSpace.Configuration(
226 self.target_parameter_space,
227 values=target_config,
228 ).check_valid_configuration()
229 except ConfigSpace.exceptions.IllegalValueError as e:
230 raise ValueError(
231 f"Invalid configuration {target_config} generated by "
232 f"inverse mapping of {config}:\n{e}"
233 ) from e
235 return target_config
237 def transform(self, configuration: pd.DataFrame) -> pd.DataFrame:
238 if len(configuration) != 1:
239 raise ValueError(
240 "Configuration dataframe must contain exactly 1 row. "
241 f"Found {len(configuration)} rows."
242 )
244 target_values_dict = configuration.iloc[0].to_dict()
245 target_configuration = ConfigSpace.Configuration(
246 self.target_parameter_space,
247 values=target_values_dict,
248 )
250 orig_values_dict = self._transform(target_values_dict)
251 orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict)
253 # Validate that the configuration is in the original space.
254 try:
255 ConfigSpace.Configuration(
256 self.orig_parameter_space,
257 values=orig_configuration,
258 ).check_valid_configuration()
259 except ConfigSpace.exceptions.IllegalValueError as e:
260 raise ValueError(
261 f"Invalid configuration {orig_configuration} generated by "
262 f"transformation of {target_configuration}:\n{e}"
263 ) from e
265 # Add to inverse dictionary -- needed for registering the performance later
266 self._suggested_configs[orig_configuration] = target_configuration
268 return pd.DataFrame(
269 [list(orig_configuration.values())], columns=list(orig_configuration.keys())
270 )
272 def _construct_low_dim_space(
273 self,
274 num_low_dims: int,
275 max_unique_values_per_param: Optional[int],
276 ) -> None:
277 """
278 Constructs the low-dimensional parameter (potentially discretized) search space.
280 Parameters
281 ----------
282 num_low_dims : int
283 Number of dimensions used in the low-dimensional parameter search space.
285 max_unique_values_per_param: Optional[int]:
286 Number of unique values per parameter. Used to discretize the parameter space.
287 If `None` space discretization is disabled.
288 """
289 # Define target space parameters
290 q_scaler = None
291 hyperparameters: List[
292 Union[ConfigSpace.UniformFloatHyperparameter, ConfigSpace.UniformIntegerHyperparameter]
293 ]
294 if max_unique_values_per_param is None:
295 hyperparameters = [
296 ConfigSpace.UniformFloatHyperparameter(name=f"dim_{idx}", lower=-1, upper=1)
297 for idx in range(num_low_dims)
298 ]
299 else:
300 # Currently supported optimizers do not support defining a discretized
301 # space (like ConfigSpace does using `q` kwarg).
302 # Thus, to support space discretization, we define the low-dimensional
303 # space using integer hyperparameters.
304 # We also employ a scaler, which scales suggested values to [-1, 1]
305 # range, used by HeSBO projection.
306 hyperparameters = [
307 ConfigSpace.UniformIntegerHyperparameter(
308 name=f"dim_{idx}",
309 lower=1,
310 upper=max_unique_values_per_param,
311 )
312 for idx in range(num_low_dims)
313 ]
315 # Initialize quantized values scaler:
316 # from [0, max_unique_values_per_param] to (-1, 1) range
317 q_scaler = MinMaxScaler(feature_range=(-1, 1))
318 ones_vector = np.ones(num_low_dims)
319 max_value_vector = ones_vector * max_unique_values_per_param
320 q_scaler.fit([ones_vector, max_value_vector])
322 self._q_scaler = q_scaler
324 # Construct low-dimensional parameter search space
325 config_space = ConfigSpace.ConfigurationSpace(name=self.orig_parameter_space.name)
326 # use same random state as in original parameter space
327 config_space.random = self._random_state
328 config_space.add(hyperparameters)
329 self._target_config_space = config_space
331 def _transform(self, configuration: dict) -> dict:
332 """
333 Projects a low-dimensional point (configuration) to the high-dimensional
334 original parameter space, and then biases the resulting parameter values towards
335 their special value(s) (if any).
337 Parameters
338 ----------
339 configuration : dict
340 Configuration in the low-dimensional space.
342 Returns
343 -------
344 configuration : dict
345 Projected configuration in the high-dimensional original search space.
346 """
347 original_parameters = list(self.orig_parameter_space.values())
348 low_dim_config_values = list(configuration.values())
350 if self._q_scaler is not None:
351 # Scale parameter values from [1, max_value] to [-1, 1]
352 low_dim_config_values = self._q_scaler.transform([low_dim_config_values])[0]
354 # Project low-dim point to original parameter space
355 original_config_values = [
356 self._sigma_vector[idx] * low_dim_config_values[self._h_matrix[idx]]
357 for idx in range(len(original_parameters))
358 ]
359 # Scale parameter values to [0, 1]
360 original_config_values = self._config_scaler.transform([original_config_values])[0]
362 original_config = {}
363 for param, norm_value in zip(original_parameters, original_config_values):
364 # Clip value to force it to fall in [0, 1]
365 # NOTE: HeSBO projection ensures that theoretically but due to
366 # floating point ops nuances this is not always guaranteed
367 value = np.clip(norm_value, 0, 1)
369 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
370 index = int(value * len(param.choices)) # truncate integer part
371 index = max(0, min(len(param.choices) - 1, index))
372 # NOTE: potential rounding here would be unfair to first & last values
373 orig_value = param.choices[index]
374 elif isinstance(param, NumericalHyperparameter):
375 if param.name in self._special_param_values_dict:
376 value = self._special_param_value_scaler(param, value)
378 orig_value = param.to_value(value)
379 orig_value = np.clip(orig_value, param.lower, param.upper)
380 else:
381 raise NotImplementedError(
382 "Only Categorical, Integer, and Float hyperparameters are currently supported."
383 )
385 original_config[param.name] = orig_value
387 return original_config
389 def _special_param_value_scaler(
390 self,
391 param: NumericalHyperparameter,
392 input_value: float,
393 ) -> float:
394 """
395 Biases the special value(s) of this parameter, by shifting the normalized
396 `input_value` towards those.
398 Parameters
399 ----------
400 param: NumericalHyperparameter
401 Parameter of the original parameter space.
403 input_value: float
404 Normalized value for this parameter, as suggested by the underlying optimizer.
406 Returns
407 -------
408 biased_value: float
409 Normalized value after special value(s) biasing is applied.
410 """
411 special_values_list = self._special_param_values_dict[param.name]
413 # Check if input value corresponds to some special value
414 perc_sum = 0.0
415 for special_value, biasing_perc in special_values_list:
416 perc_sum += biasing_perc
417 if input_value < perc_sum:
418 return float(param.to_vector(special_value))
420 # Scale input value uniformly to non-special values
421 return float(param.to_vector((input_value - perc_sum) / (1 - perc_sum)))
423 # pylint: disable=too-complex,too-many-branches
424 def _validate_special_param_values(self, special_param_values_dict: dict) -> None:
425 """
426 Checks that the user-provided dict of special parameter values is valid. And
427 assigns it to the corresponding attribute.
429 Parameters
430 ----------
431 special_param_values_dict: dict
432 User-provided dict of special parameter values.
434 Raises
435 ------
436 ValueError: if dictionary key, valid, or structure is invalid.
437 NotImplementedError: if special value is defined for a non-integer parameter
438 """
439 error_prefix = "Validation of special parameter values dict failed."
441 all_parameters = list(self.orig_parameter_space.keys())
442 sanitized_dict = {}
444 for param, value in special_param_values_dict.items():
445 if param not in all_parameters:
446 raise ValueError(error_prefix + f"Parameter '{param}' does not exist.")
448 hyperparameter = self.orig_parameter_space[param]
449 if not isinstance(hyperparameter, ConfigSpace.UniformIntegerHyperparameter):
450 raise NotImplementedError(
451 error_prefix + f"Parameter '{param}' is not supported. "
452 "Only Integer Hyperparameters are currently supported."
453 )
455 if isinstance(value, int):
456 # User specifies a single special value -- default biasing percentage is used
457 tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)]
458 elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]:
459 # User specifies both special value and biasing percentage
460 tuple_list = [value]
461 elif isinstance(value, list) and value:
462 if all(isinstance(t, int) for t in value):
463 # User specifies list of special values
464 tuple_list = [
465 (v, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE) for v in value
466 ]
467 elif all(
468 isinstance(t, tuple) and [type(v) for v in t] == [int, float] for t in value
469 ):
470 # User specifies list of tuples; each tuple defines the special
471 # value and the biasing percentage
472 tuple_list = value
473 else:
474 raise ValueError(
475 error_prefix + f"Invalid format in value list for parameter '{param}'. "
476 f"Special value list should contain either integers, "
477 "or (special value, biasing %) tuples."
478 )
479 else:
480 raise ValueError(
481 error_prefix + f"Invalid format for parameter '{param}'. Dict value should be "
482 "an int, a (int, float) tuple, a list of integers, "
483 "or a list of (int, float) tuples."
484 )
486 # Are user-specified special values valid?
487 if not all(hyperparameter.lower <= v <= hyperparameter.upper for v, _ in tuple_list):
488 raise ValueError(
489 error_prefix
490 + "One (or more) special values are outside of parameter "
491 + f"'{param}' value domain."
492 )
493 # Are user-provided special values unique?
494 if len(set(v for v, _ in tuple_list)) != len(tuple_list):
495 raise ValueError(
496 error_prefix
497 + "One (or more) special values are defined more than once "
498 + f"for parameter '{param}'."
499 )
500 # Are biasing percentages valid?
501 if not all(0 < perc < 1 for _, perc in tuple_list):
502 raise ValueError(
503 error_prefix
504 + f"One (or more) biasing percentages for parameter '{param}' are invalid: "
505 "i.e., fall outside (0, 1) range."
506 )
508 total_percentage = sum(perc for _, perc in tuple_list)
509 if total_percentage >= 1.0:
510 raise ValueError(
511 error_prefix
512 + f"Total special values percentage for parameter '{param}' surpass 100%."
513 )
514 # ... and reasonable?
515 if total_percentage >= 0.5:
516 warn(
517 f"Total special values percentage for parameter '{param}' exceeds 50%.",
518 UserWarning,
519 )
521 sanitized_dict[param] = tuple_list
523 self._special_param_values_dict = sanitized_dict
525 def _try_generate_approx_inverse_mapping(self) -> None:
526 """Tries to generate an approximate reverse mapping:
527 i.e., from high-dimensional space to the low-dimensional one.
529 Reverse mapping is generated using the pseudo-inverse matrix, of original
530 HeSBO projection matrix.
531 This mapping can be potentially used to register configurations that were
532 *not* previously suggested by the optimizer.
534 NOTE: This method is experimental, and there is currently no guarantee that
535 it works as expected.
537 Raises
538 ------
539 RuntimeError: if reverse mapping computation fails.
540 """
541 from scipy.linalg import ( # pylint: disable=import-outside-toplevel
542 LinAlgError,
543 pinv,
544 )
546 warn(
547 (
548 "Trying to register a configuration that was not "
549 "previously suggested by the optimizer.\n"
550 "This inverse configuration transformation is typically not supported.\n"
551 "However, we will try to register this configuration "
552 "using an *experimental* method."
553 ),
554 UserWarning,
555 )
557 orig_space_num_dims = len(list(self.orig_parameter_space.values()))
558 target_space_num_dims = len(list(self.target_parameter_space.values()))
560 # Construct dense projection matrix from sparse repr
561 proj_matrix = np.zeros(shape=(orig_space_num_dims, target_space_num_dims))
562 for row, col in enumerate(self._h_matrix):
563 proj_matrix[row][col] = self._sigma_vector[row]
565 # Compute pseudo-inverse matrix
566 try:
567 self._pinv_matrix = pinv(proj_matrix)
568 except LinAlgError as err:
569 raise RuntimeError(
570 f"Unable to generate reverse mapping using pseudo-inverse matrix: {repr(err)}"
571 ) from err
572 assert self._pinv_matrix.shape == (target_space_num_dims, orig_space_num_dims)