Coverage for mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py: 98%
109 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-14 01:58 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-14 01:58 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""A wrapper for mlos_core optimizers for mlos_bench."""
7import logging
8import os
9from types import TracebackType
10from typing import Dict, Literal, Optional, Sequence, Tuple, Type, Union
12import pandas as pd
14from mlos_bench.environments.status import Status
15from mlos_bench.optimizers.base_optimizer import Optimizer
16from mlos_bench.optimizers.convert_configspace import (
17 TunableValueKind,
18 configspace_data_to_tunable_values,
19 special_param_names,
20)
21from mlos_bench.services.base_service import Service
22from mlos_bench.tunables.tunable import TunableValue
23from mlos_bench.tunables.tunable_groups import TunableGroups
24from mlos_core.data_classes import Observations
25from mlos_core.optimizers import (
26 DEFAULT_OPTIMIZER_TYPE,
27 BaseOptimizer,
28 OptimizerFactory,
29 OptimizerType,
30 SpaceAdapterType,
31)
33_LOG = logging.getLogger(__name__)
36class MlosCoreOptimizer(Optimizer):
37 """A wrapper class for the mlos_core optimizers."""
39 def __init__(
40 self,
41 tunables: TunableGroups,
42 config: dict,
43 global_config: Optional[dict] = None,
44 service: Optional[Service] = None,
45 ):
46 super().__init__(tunables, config, global_config, service)
48 opt_type = getattr(
49 OptimizerType, self._config.pop("optimizer_type", DEFAULT_OPTIMIZER_TYPE.name)
50 )
52 if opt_type == OptimizerType.SMAC:
53 output_directory = self._config.get("output_directory")
54 if output_directory is not None:
55 # If output_directory is specified, turn it into an absolute path.
56 self._config["output_directory"] = os.path.abspath(output_directory)
57 else:
58 _LOG.warning(
59 (
60 "SMAC optimizer output_directory was null. "
61 "SMAC will use a temporary directory."
62 )
63 )
65 # Make sure max_trials >= max_suggestions.
66 if "max_trials" not in self._config:
67 self._config["max_trials"] = self._max_suggestions
68 assert int(self._config["max_trials"]) >= self._max_suggestions, (
69 f"""max_trials {self._config.get("max_trials")} """
70 f"<= max_suggestions{self._max_suggestions}"
71 )
73 if "run_name" not in self._config and self.experiment_id:
74 self._config["run_name"] = self.experiment_id
76 space_adapter_type = self._config.pop("space_adapter_type", None)
77 space_adapter_config = self._config.pop("space_adapter_config", {})
79 if space_adapter_type is not None:
80 space_adapter_type = getattr(SpaceAdapterType, space_adapter_type)
82 self._opt: BaseOptimizer = OptimizerFactory.create(
83 parameter_space=self.config_space,
84 optimization_targets=list(self._opt_targets),
85 optimizer_type=opt_type,
86 optimizer_kwargs=self._config,
87 space_adapter_type=space_adapter_type,
88 space_adapter_kwargs=space_adapter_config,
89 )
91 def __exit__(
92 self,
93 ex_type: Optional[Type[BaseException]],
94 ex_val: Optional[BaseException],
95 ex_tb: Optional[TracebackType],
96 ) -> Literal[False]:
97 self._opt.cleanup()
98 return super().__exit__(ex_type, ex_val, ex_tb)
100 @property
101 def name(self) -> str:
102 return f"{self.__class__.__name__}:{self._opt.__class__.__name__}"
104 def bulk_register(
105 self,
106 configs: Sequence[dict],
107 scores: Sequence[Optional[Dict[str, TunableValue]]],
108 status: Optional[Sequence[Status]] = None,
109 ) -> bool:
111 if not super().bulk_register(configs, scores, status):
112 return False
114 df_configs = self._to_df(configs) # Impute missing values, if necessary
116 df_scores = self._adjust_signs_df(
117 pd.DataFrame([{} if score is None else score for score in scores])
118 )
120 if status is not None:
121 # Select only the completed trials, set scores for failed trials to +inf.
122 df_status = pd.Series(status)
123 # TODO: Be more flexible with values used for failed trials (not just +inf).
124 # Issue: https://github.com/microsoft/MLOS/issues/523
125 df_scores[df_status != Status.SUCCEEDED] = float("inf")
126 df_status_completed = df_status.apply(Status.is_completed)
127 df_configs = df_configs[df_status_completed]
128 df_scores = df_scores[df_status_completed]
130 # TODO: Specify (in the config) which metrics to pass to the optimizer.
131 # Issue: https://github.com/microsoft/MLOS/issues/745
132 self._opt.register(observations=Observations(configs=df_configs, scores=df_scores))
134 if _LOG.isEnabledFor(logging.DEBUG):
135 (score, _) = self.get_best_observation()
136 _LOG.debug("Warm-up END: %s :: %s", self, score)
138 return True
140 def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame:
141 """Coerce optimization target scores to floats and adjust the signs for
142 MINIMIZATION problem.
143 """
144 df_targets = df_scores[list(self._opt_targets)]
145 try:
146 return df_targets.astype(float) * self._opt_targets.values()
147 except ValueError as ex:
148 _LOG.error(
149 "Some score values cannot be converted to float - check the data ::\n%s",
150 df_targets,
151 exc_info=True,
152 )
153 raise ValueError("Some score values cannot be converted to float") from ex
155 def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame:
156 """
157 Select from past trials only the columns required in this experiment and impute
158 default values for the tunables that are missing in the dataframe.
160 Parameters
161 ----------
162 configs : Sequence[dict]
163 Sequence of dicts with past trials data.
165 Returns
166 -------
167 df_configs : pd.DataFrame
168 A dataframe with past trials data, with missing values imputed.
169 """
170 df_configs = pd.DataFrame(configs)
171 tunables_names = list(self._tunables.get_param_values().keys())
172 missing_cols = set(tunables_names).difference(df_configs.columns)
173 for tunable, _group in self._tunables:
174 if tunable.name in missing_cols:
175 df_configs[tunable.name] = tunable.default
176 else:
177 df_configs.fillna({tunable.name: tunable.default}, inplace=True)
178 # External data can have incorrect types (e.g., all strings).
179 df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype)
180 # Add columns for tunables with special values.
181 if tunable.special:
182 (special_name, type_name) = special_param_names(tunable.name)
183 tunables_names += [special_name, type_name]
184 is_special = df_configs[tunable.name].apply(tunable.special.__contains__)
185 df_configs[type_name] = TunableValueKind.RANGE
186 df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL
187 if tunable.type == "int":
188 # Make int column NULLABLE:
189 df_configs[tunable.name] = df_configs[tunable.name].astype("Int64")
190 df_configs[special_name] = df_configs[tunable.name]
191 df_configs.loc[~is_special, special_name] = None
192 df_configs.loc[is_special, tunable.name] = None
193 # By default, hyperparameters in ConfigurationSpace are sorted by name:
194 df_configs = df_configs[sorted(tunables_names)]
195 _LOG.debug("Loaded configs:\n%s", df_configs)
196 return df_configs
198 def suggest(self) -> TunableGroups:
199 tunables = super().suggest()
200 if self._start_with_defaults:
201 _LOG.info("Use default values for the first trial")
202 suggestion = self._opt.suggest(defaults=self._start_with_defaults)
203 self._start_with_defaults = False
204 _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, suggestion.config)
205 return tunables.assign(configspace_data_to_tunable_values(suggestion.config.to_dict()))
207 def register(
208 self,
209 tunables: TunableGroups,
210 status: Status,
211 score: Optional[Dict[str, TunableValue]] = None,
212 ) -> Optional[Dict[str, float]]:
213 registered_score = super().register(
214 tunables,
215 status,
216 score,
217 ) # Sign-adjusted for MINIMIZATION
218 if status.is_completed():
219 assert registered_score is not None
220 df_config = self._to_df([tunables.get_param_values()])
221 _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config)
222 # TODO: Specify (in the config) which metrics to pass to the optimizer.
223 # Issue: https://github.com/microsoft/MLOS/issues/745
224 self._opt.register(
225 observations=Observations(
226 configs=df_config,
227 scores=pd.DataFrame([registered_score], dtype=float),
228 )
229 )
230 return registered_score
232 def get_best_observation(
233 self,
234 ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:
235 best_observations = self._opt.get_best_observations()
236 if len(best_observations) == 0:
237 return (None, None)
238 params = configspace_data_to_tunable_values(best_observations.configs.iloc[0].to_dict())
239 scores = self._adjust_signs_df(best_observations.scores).iloc[0].to_dict()
240 _LOG.debug("Best observation: %s score: %s", params, scores)
241 return (scores, self._tunables.copy().assign(params))