Coverage for mlos_bench/mlos_bench/optimizers/mlos_core

2# Copyright (c) Microsoft Corporation.

3# Licensed under the MIT License.

5"""A wrapper for mlos_core optimizers for mlos_bench."""

7import logging

8import os

9from types import TracebackType

10from typing import Dict, Optional, Sequence, Tuple, Type, Union

12import pandas as pd

13from typing_extensions import Literal

15from mlos_bench.environments.status import Status

16from mlos_bench.optimizers.base_optimizer import Optimizer

17from mlos_bench.optimizers.convert_configspace import (

18 TunableValueKind,

19 configspace_data_to_tunable_values,

20 special_param_names,

21)

22from mlos_bench.services.base_service import Service

23from mlos_bench.tunables.tunable import TunableValue

24from mlos_bench.tunables.tunable_groups import TunableGroups

25from mlos_core.optimizers import (

26 DEFAULT_OPTIMIZER_TYPE,

27 BaseOptimizer,

28 OptimizerFactory,

29 OptimizerType,

30 SpaceAdapterType,

31)

33_LOG = logging.getLogger(__name__)

36class MlosCoreOptimizer(Optimizer):

37 """A wrapper class for the mlos_core optimizers."""

39 def __init__(

40 self,

41 tunables: TunableGroups,

42 config: dict,

43 global_config: Optional[dict] = None,

44 service: Optional[Service] = None,

45 ):

46 super().__init__(tunables, config, global_config, service)

48 opt_type = getattr(

49 OptimizerType, self._config.pop("optimizer_type", DEFAULT_OPTIMIZER_TYPE.name)

50 )

52 if opt_type == OptimizerType.SMAC:

53 output_directory = self._config.get("output_directory")

54 if output_directory is not None:

55 # If output_directory is specified, turn it into an absolute path.

56 self._config["output_directory"] = os.path.abspath(output_directory)

57 else:

58 _LOG.warning(

59 (

60 "SMAC optimizer output_directory was null. "

61 "SMAC will use a temporary directory."

62 )

63 )

65 # Make sure max_trials >= max_suggestions.

66 if "max_trials" not in self._config:

67 self._config["max_trials"] = self._max_suggestions

68 assert int(self._config["max_trials"]) >= self._max_suggestions, (

69 f"max_trials {self._config.get('max_trials')} "

70 f"<= max_suggestions{self._max_suggestions}"

71 )

73 if "run_name" not in self._config and self.experiment_id:

74 self._config["run_name"] = self.experiment_id

76 space_adapter_type = self._config.pop("space_adapter_type", None)

77 space_adapter_config = self._config.pop("space_adapter_config", {})

79 if space_adapter_type is not None:

80 space_adapter_type = getattr(SpaceAdapterType, space_adapter_type)

82 self._opt: BaseOptimizer = OptimizerFactory.create(

83 parameter_space=self.config_space,

84 optimization_targets=list(self._opt_targets),

85 optimizer_type=opt_type,

86 optimizer_kwargs=self._config,

87 space_adapter_type=space_adapter_type,

88 space_adapter_kwargs=space_adapter_config,

89 )

91 def __exit__(

92 self,

93 ex_type: Optional[Type[BaseException]],

94 ex_val: Optional[BaseException],

95 ex_tb: Optional[TracebackType],

96 ) -> Literal[False]:

97 self._opt.cleanup()

98 return super().__exit__(ex_type, ex_val, ex_tb)

100 @property

101 def name(self) -> str:

102 return f"{self.__class__.__name__}:{self._opt.__class__.__name__}"

103

104 def bulk_register(

105 self,

106 configs: Sequence[dict],

107 scores: Sequence[Optional[Dict[str, TunableValue]]],

108 status: Optional[Sequence[Status]] = None,

109 ) -> bool:

110

111 if not super().bulk_register(configs, scores, status):

112 return False

113

114 df_configs = self._to_df(configs) # Impute missing values, if necessary

115

116 df_scores = self._adjust_signs_df(

117 pd.DataFrame([{} if score is None else score for score in scores])

118 )

119

120 if status is not None:

121 # Select only the completed trials, set scores for failed trials to +inf.

122 df_status = pd.Series(status)

123 # TODO: Be more flexible with values used for failed trials (not just +inf).

124 # Issue: https://github.com/microsoft/MLOS/issues/523

125 df_scores[df_status != Status.SUCCEEDED] = float("inf")

126 df_status_completed = df_status.apply(Status.is_completed)

127 df_configs = df_configs[df_status_completed]

128 df_scores = df_scores[df_status_completed]

129

130 # TODO: Specify (in the config) which metrics to pass to the optimizer.

131 # Issue: https://github.com/microsoft/MLOS/issues/745

132 self._opt.register(configs=df_configs, scores=df_scores)

133

134 if _LOG.isEnabledFor(logging.DEBUG):

135 (score, _) = self.get_best_observation()

136 _LOG.debug("Warm-up END: %s :: %s", self, score)

137

138 return True

139

140 def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame:

141 """Coerce optimization target scores to floats and adjust the signs for

142 MINIMIZATION problem.

143 """

144 df_targets = df_scores[list(self._opt_targets)]

145 try:

146 return df_targets.astype(float) * self._opt_targets.values()

147 except ValueError as ex:

148 _LOG.error(

149 "Some score values cannot be converted to float - check the data ::\n%s",

150 df_targets,

151 exc_info=True,

152 )

153 raise ValueError("Some score values cannot be converted to float") from ex

154

155 def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame:

156 """

157 Select from past trials only the columns required in this experiment and impute

158 default values for the tunables that are missing in the dataframe.

159

160 Parameters

161 ----------

162 configs : Sequence[dict]

163 Sequence of dicts with past trials data.

164

165 Returns

166 -------

167 df_configs : pd.DataFrame

168 A dataframe with past trials data, with missing values imputed.

169 """

170 df_configs = pd.DataFrame(configs)

171 tunables_names = list(self._tunables.get_param_values().keys())

172 missing_cols = set(tunables_names).difference(df_configs.columns)

173 for tunable, _group in self._tunables:

174 if tunable.name in missing_cols:

175 df_configs[tunable.name] = tunable.default

176 else:

177 df_configs.fillna({tunable.name: tunable.default}, inplace=True)

178 # External data can have incorrect types (e.g., all strings).

179 df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype)

180 # Add columns for tunables with special values.

181 if tunable.special:

182 (special_name, type_name) = special_param_names(tunable.name)

183 tunables_names += [special_name, type_name]

184 is_special = df_configs[tunable.name].apply(tunable.special.__contains__)

185 df_configs[type_name] = TunableValueKind.RANGE

186 df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL

187 if tunable.type == "int":

188 # Make int column NULLABLE:

189 df_configs[tunable.name] = df_configs[tunable.name].astype("Int64")

190 df_configs[special_name] = df_configs[tunable.name]

191 df_configs.loc[~is_special, special_name] = None

192 df_configs.loc[is_special, tunable.name] = None

193 # By default, hyperparameters in ConfigurationSpace are sorted by name:

194 df_configs = df_configs[sorted(tunables_names)]

195 _LOG.debug("Loaded configs:\n%s", df_configs)

196 return df_configs

197

198 def suggest(self) -> TunableGroups:

199 tunables = super().suggest()

200 if self._start_with_defaults:

201 _LOG.info("Use default values for the first trial")

202 df_config, _metadata = self._opt.suggest(defaults=self._start_with_defaults)

203 self._start_with_defaults = False

204 _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config)

205 return tunables.assign(configspace_data_to_tunable_values(df_config.loc[0].to_dict()))

206

207 def register(

208 self,

209 tunables: TunableGroups,

210 status: Status,

211 score: Optional[Dict[str, TunableValue]] = None,

212 ) -> Optional[Dict[str, float]]:

213 registered_score = super().register(

214 tunables,

215 status,

216 score,

217 ) # Sign-adjusted for MINIMIZATION

218 if status.is_completed():

219 assert registered_score is not None

220 df_config = self._to_df([tunables.get_param_values()])

221 _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config)

222 # TODO: Specify (in the config) which metrics to pass to the optimizer.

223 # Issue: https://github.com/microsoft/MLOS/issues/745

224 self._opt.register(

225 configs=df_config,

226 scores=pd.DataFrame([registered_score], dtype=float),

227 )

228 return registered_score

229

230 def get_best_observation(

231 self,

232 ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:

233 (df_config, df_score, _df_context) = self._opt.get_best_observations()

234 if len(df_config) == 0:

235 return (None, None)

236 params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict())

237 scores = self._adjust_signs_df(df_score).iloc[0].to_dict()

238 _LOG.debug("Best observation: %s score: %s", params, scores)

239 return (scores, self._tunables.copy().assign(params))

Coverage for mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py: 98%

109 statements