Coverage for mlos_bench/mlos_bench/services/config_persistence.py: 95%
167 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-14 01:58 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-14 01:58 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Helper functions to load, instantiate, and serialize Python objects that encapsulate
6a benchmark :py:class:`.Environment`, :py:mod:`~mlos_bench.tunables`,
7:py:class:`.Service` functions, etc from JSON configuration files and strings.
8"""
10import logging
11import os
12import sys
13from typing import (
14 TYPE_CHECKING,
15 Any,
16 Callable,
17 Dict,
18 Iterable,
19 List,
20 Optional,
21 Tuple,
22 Union,
23)
25import json5 # To read configs with comments and other JSON5 syntax features
26from jsonschema import SchemaError, ValidationError
28from mlos_bench.config.schemas.config_schemas import ConfigSchema
29from mlos_bench.environments.base_environment import Environment
30from mlos_bench.optimizers.base_optimizer import Optimizer
31from mlos_bench.services.base_service import Service
32from mlos_bench.services.types.config_loader_type import SupportsConfigLoading
33from mlos_bench.tunables.tunable import TunableValue
34from mlos_bench.tunables.tunable_groups import TunableGroups
35from mlos_bench.util import (
36 instantiate_from_config,
37 merge_parameters,
38 path_join,
39 preprocess_dynamic_configs,
40)
42if sys.version_info < (3, 10):
43 from importlib_resources import files
44else:
45 from importlib.resources import files
47if TYPE_CHECKING:
48 from mlos_bench.schedulers.base_scheduler import Scheduler
49 from mlos_bench.storage.base_storage import Storage
52_LOG = logging.getLogger(__name__)
55class ConfigPersistenceService(Service, SupportsConfigLoading):
56 """Collection of methods to deserialize the Environment, Service, and TunableGroups
57 objects.
58 """
60 BUILTIN_CONFIG_PATH = str(files("mlos_bench.config").joinpath("")).replace("\\", "/")
61 """A calculated path to the built-in configuration files shipped with the mlos_bench
62 package.
63 """
65 def __init__(
66 self,
67 config: Optional[Dict[str, Any]] = None,
68 global_config: Optional[Dict[str, Any]] = None,
69 parent: Optional[Service] = None,
70 methods: Union[Dict[str, Callable], List[Callable], None] = None,
71 ):
72 """
73 Create a new instance of config persistence service.
75 Parameters
76 ----------
77 config : dict
78 Free-format dictionary that contains parameters for the service.
79 (E.g., root path for config files, etc.)
80 global_config : dict
81 Free-format dictionary of global parameters.
82 parent : Service
83 An optional parent service that can provide mixin functions.
84 methods : Union[Dict[str, Callable], List[Callable], None]
85 New methods to register with the service.
86 """
87 super().__init__(
88 config,
89 global_config,
90 parent,
91 self.merge_methods(
92 methods,
93 [
94 self.resolve_path,
95 self.load_config,
96 self.prepare_class_load,
97 self.build_service,
98 self.build_environment,
99 self.load_services,
100 self.load_environment,
101 self.load_environment_list,
102 ],
103 ),
104 )
105 self._config_loader_service = self
107 # Normalize and deduplicate config paths, but maintain order.
108 self._config_path: List[str] = []
109 for path in self.config.get("config_path", []):
110 if path not in self._config_path:
111 self._config_path.append(path_join(path, abs_path=True))
112 # Prepend the cwd if not already on the list.
113 cwd = path_join(os.getcwd(), abs_path=True)
114 if cwd not in self._config_path:
115 self._config_path.insert(0, cwd)
116 # Append the built-in config path if not already on the list.
117 if self.BUILTIN_CONFIG_PATH not in self._config_path:
118 self._config_path.append(self.BUILTIN_CONFIG_PATH)
120 @property
121 def config_paths(self) -> List[str]:
122 """
123 Gets the list of config paths this service will search for config files.
125 Returns
126 -------
127 List[str]
128 """
129 return list(self._config_path) # make a copy to avoid modifications
131 def resolve_path(self, file_path: str, extra_paths: Optional[Iterable[str]] = None) -> str:
132 """
133 Resolves and prepends the suitable :py:attr:`.config_paths` to ``file_path`` if
134 the latter is not absolute. If :py:attr:`.config_paths` is ``None`` or
135 ``file_path`` is absolute, return ``file_path`` as is.
137 Parameters
138 ----------
139 file_path : str
140 Path to the input config file.
141 extra_paths : Iterable[str]
142 Additional directories to prepend to the list of
143 :py:attr:`.config_paths` search paths.
145 Returns
146 -------
147 path : str
148 An actual path to the config or script.
149 """
150 path_list = list(extra_paths or []) + self._config_path
151 _LOG.debug("Resolve path: %s in: %s", file_path, path_list)
152 if os.path.isabs(file_path):
153 _LOG.debug("Path is absolute: %s", file_path)
154 return file_path
155 for path in path_list:
156 full_path = path_join(path, file_path, abs_path=True)
157 if os.path.exists(full_path):
158 _LOG.debug("Path resolved: %s", full_path)
159 return full_path
160 _LOG.debug("Path not resolved: %s", file_path)
161 return file_path
163 def load_config(
164 self,
165 json: str,
166 schema_type: Optional[ConfigSchema],
167 ) -> Dict[str, Any]:
168 """
169 Load JSON config file or JSON string. Search for a file relative to
170 :py:attr:`.config_paths` if the input path is not absolute. This method is
171 exported to be used as a :py:class:`.SupportsConfigLoading` type
172 :py:class:`.Service`.
174 Parameters
175 ----------
176 json : str
177 Path to the input config file or a JSON string.
178 schema_type : Optional[ConfigSchema]
179 The schema type to validate the config against.
181 Returns
182 -------
183 config : Union[dict, List[dict]]
184 Free-format dictionary that contains the configuration.
185 """
186 assert isinstance(json, str)
187 if any(c in json for c in ("{", "[")):
188 # If the path contains braces, it is likely already a json string,
189 # so just parse it.
190 _LOG.info("Load config from json string: %s", json)
191 try:
192 config: Any = json5.loads(json)
193 except ValueError as ex:
194 _LOG.error("Failed to parse config from JSON string: %s", json)
195 raise ValueError(f"Failed to parse config from JSON string: {json}") from ex
196 else:
197 json = self.resolve_path(json)
198 _LOG.info("Load config file: %s", json)
199 with open(json, mode="r", encoding="utf-8") as fh_json:
200 config = json5.load(fh_json)
201 if schema_type is not None:
202 try:
203 schema_type.validate(config)
204 except (ValidationError, SchemaError) as ex:
205 _LOG.error(
206 "Failed to validate config %s against schema type %s at %s",
207 json,
208 schema_type.name,
209 schema_type.value,
210 )
211 raise ValueError(
212 f"Failed to validate config {json} against "
213 f"schema type {schema_type.name} at {schema_type.value}"
214 ) from ex
215 if isinstance(config, dict) and config.get("$schema"):
216 # Remove $schema attributes from the config after we've validated
217 # them to avoid passing them on to other objects
218 # (e.g. SqlAlchemy based storage initializers).
219 # NOTE: we only do this for internal schemas.
220 # Other configs that get loaded may need the schema field
221 # (e.g. Azure ARM templates).
222 del config["$schema"]
223 else:
224 _LOG.warning("Config %s is not validated against a schema.", json)
225 return config # type: ignore[no-any-return]
227 def prepare_class_load(
228 self,
229 config: Dict[str, Any],
230 global_config: Optional[Dict[str, Any]] = None,
231 parent_args: Optional[Dict[str, TunableValue]] = None,
232 ) -> Tuple[str, Dict[str, Any]]:
233 """
234 Extract the class instantiation parameters from the configuration. Mix-in the
235 global parameters and resolve the local file system paths, where it is required.
237 Parameters
238 ----------
239 config : dict
240 Configuration of the optimizer.
241 global_config : dict
242 Global configuration parameters (optional).
243 parent_args : Dict[str, TunableValue]
244 An optional reference of the parent CompositeEnv's const_args used to
245 expand dynamic config parameters from.
247 Returns
248 -------
249 (class_name, class_config) : (str, dict)
250 Name of the class to instantiate and its configuration.
251 """
252 class_name = config["class"]
253 class_config = config.setdefault("config", {})
255 # Replace any appearance of "$param_name" in the const_arg values with
256 # the value from the parent CompositeEnv.
257 # Note: we could consider expanding this feature to additional config
258 # sections in the future, but for now only use it in const_args.
259 if class_name.startswith("mlos_bench.environments."):
260 const_args = class_config.get("const_args", {})
261 preprocess_dynamic_configs(dest=const_args, source=parent_args)
263 merge_parameters(dest=class_config, source=global_config)
265 for key in set(class_config).intersection(config.get("resolve_config_property_paths", [])):
266 if isinstance(class_config[key], str):
267 class_config[key] = self.resolve_path(class_config[key])
268 elif isinstance(class_config[key], (list, tuple)):
269 class_config[key] = [self.resolve_path(path) for path in class_config[key]]
270 else:
271 raise ValueError(f"Parameter {key} must be a string or a list")
273 if _LOG.isEnabledFor(logging.DEBUG):
274 _LOG.debug(
275 "Instantiating: %s with config:\n%s",
276 class_name,
277 json5.dumps(class_config, indent=2),
278 )
280 return (class_name, class_config)
282 def build_optimizer(
283 self,
284 *,
285 tunables: TunableGroups,
286 service: Service,
287 config: Dict[str, Any],
288 global_config: Optional[Dict[str, Any]] = None,
289 ) -> Optimizer:
290 """
291 Instantiation of :py:mod:`mlos_bench` :py:class:`.Optimizer` that depend on
292 :py:class:`.Service` and :py:class:`.TunableGroups`.
294 Parameters
295 ----------
296 tunables : TunableGroups
297 Tunable parameters of the environment. We need them to validate the
298 configurations of merged-in experiments and restored/pending trials.
299 service: Service
300 An optional service object (e.g., providing methods to load config files, etc.)
301 config : dict
302 Configuration of the class to instantiate, as loaded from JSON.
303 global_config : dict
304 Global configuration parameters (optional).
306 Returns
307 -------
308 inst : Optimizer
309 A new instance of the `Optimizer` class.
310 """
311 tunables_path = config.get("include_tunables")
312 if tunables_path is not None:
313 tunables = self.load_tunables(tunables_path, tunables)
314 (class_name, class_config) = self.prepare_class_load(config, global_config)
315 inst = instantiate_from_config(
316 Optimizer, # type: ignore[type-abstract]
317 class_name,
318 tunables=tunables,
319 config=class_config,
320 global_config=global_config,
321 service=service,
322 )
323 _LOG.info("Created: Optimizer %s", inst)
324 return inst
326 def build_storage(
327 self,
328 *,
329 service: Service,
330 config: Dict[str, Any],
331 global_config: Optional[Dict[str, Any]] = None,
332 ) -> "Storage":
333 """
334 Instantiation of mlos_bench :py:class:`.Storage` objects.
336 Parameters
337 ----------
338 service: Service
339 An optional service object (e.g., providing methods to load config files, etc.)
340 config : dict
341 Configuration of the class to instantiate, as loaded from JSON.
342 global_config : dict
343 Global configuration parameters (optional).
345 Returns
346 -------
347 inst : Storage
348 A new instance of the Storage class.
349 """
350 (class_name, class_config) = self.prepare_class_load(config, global_config)
351 # pylint: disable=import-outside-toplevel
352 from mlos_bench.storage.base_storage import Storage
354 inst = instantiate_from_config(
355 Storage, # type: ignore[type-abstract]
356 class_name,
357 config=class_config,
358 global_config=global_config,
359 service=service,
360 )
361 _LOG.info("Created: Storage %s", inst)
362 return inst
364 def build_scheduler( # pylint: disable=too-many-arguments
365 self,
366 *,
367 config: Dict[str, Any],
368 global_config: Dict[str, Any],
369 environment: Environment,
370 optimizer: Optimizer,
371 storage: "Storage",
372 root_env_config: str,
373 ) -> "Scheduler":
374 """
375 Instantiation of mlos_bench :py:class:`.Scheduler`.
377 Parameters
378 ----------
379 config : dict
380 Configuration of the class to instantiate, as loaded from JSON.
381 global_config : dict
382 Global configuration parameters.
383 environment : Environment
384 The environment to benchmark/optimize.
385 optimizer : Optimizer
386 The optimizer to use.
387 storage : Storage
388 The storage to use.
389 root_env_config : str
390 Path to the root environment configuration.
392 Returns
393 -------
394 inst : Scheduler
395 A new instance of the Scheduler.
396 """
397 (class_name, class_config) = self.prepare_class_load(config, global_config)
398 # pylint: disable=import-outside-toplevel
399 from mlos_bench.schedulers.base_scheduler import Scheduler
401 inst = instantiate_from_config(
402 Scheduler, # type: ignore[type-abstract]
403 class_name,
404 config=class_config,
405 global_config=global_config,
406 environment=environment,
407 optimizer=optimizer,
408 storage=storage,
409 root_env_config=root_env_config,
410 )
411 _LOG.info("Created: Scheduler %s", inst)
412 return inst
414 def build_environment(
415 self,
416 config: Dict[str, Any],
417 tunables: TunableGroups,
418 global_config: Optional[Dict[str, Any]] = None,
419 parent_args: Optional[Dict[str, TunableValue]] = None,
420 service: Optional[Service] = None,
421 ) -> Environment:
422 # pylint: disable=too-many-arguments,too-many-positional-arguments
423 """
424 Factory method for a new :py:class:`.Environment` with a given config.
426 Parameters
427 ----------
428 config : dict
429 A dictionary with three mandatory fields:
430 "name": Human-readable string describing the environment;
431 "class": FQN of a Python class to instantiate;
432 "config": Free-format dictionary to pass to the constructor.
433 tunables : TunableGroups
434 A (possibly empty) collection of groups of tunable parameters for
435 all environments.
436 global_config : dict
437 Global parameters to add to the environment config.
438 parent_args : Dict[str, TunableValue]
439 An optional reference of the parent CompositeEnv's const_args used to
440 expand dynamic config parameters from.
441 service: Service
442 An optional service object (e.g., providing methods to
443 deploy or reboot a VM, etc.).
445 Returns
446 -------
447 env : Environment
448 An instance of the ``Environment`` class initialized with ``config``.
449 """
450 env_name = config["name"]
451 (env_class, env_config) = self.prepare_class_load(config, global_config, parent_args)
453 env_services_path = config.get("include_services")
454 if env_services_path is not None:
455 service = self.load_services(env_services_path, global_config, service)
457 env_tunables_path = config.get("include_tunables")
458 if env_tunables_path is not None:
459 tunables = self.load_tunables(env_tunables_path, tunables)
461 _LOG.debug("Creating env: %s :: %s", env_name, env_class)
462 env = Environment.new(
463 env_name=env_name,
464 class_name=env_class,
465 config=env_config,
466 global_config=global_config,
467 tunables=tunables,
468 service=service,
469 )
471 _LOG.info("Created env: %s :: %s", env_name, env)
472 return env
474 def _build_standalone_service(
475 self,
476 config: Dict[str, Any],
477 global_config: Optional[Dict[str, Any]] = None,
478 parent: Optional[Service] = None,
479 ) -> Service:
480 """
481 Factory method for a new service with a given config.
483 Parameters
484 ----------
485 config : dict
486 A dictionary with two mandatory fields:
487 "class": FQN of a Python class to instantiate;
488 "config": Free-format dictionary to pass to the constructor.
489 global_config : dict
490 Global parameters to add to the service config.
491 parent: Service
492 An optional reference of the parent service to mix in.
494 Returns
495 -------
496 svc : Service
497 An instance of the `Service` class initialized with `config`.
498 """
499 (svc_class, svc_config) = self.prepare_class_load(config, global_config)
500 service = Service.new(svc_class, svc_config, global_config, parent)
501 _LOG.info("Created service: %s", service)
502 return service
504 def _build_composite_service(
505 self,
506 config_list: Iterable[Dict[str, Any]],
507 global_config: Optional[Dict[str, Any]] = None,
508 parent: Optional[Service] = None,
509 ) -> Service:
510 """
511 Factory method for a new service with a given config.
513 Parameters
514 ----------
515 config_list : a list of dict
516 A list where each element is a dictionary with 2 mandatory fields:
517 "class": FQN of a Python class to instantiate;
518 "config": Free-format dictionary to pass to the constructor.
519 global_config : dict
520 Global parameters to add to the service config.
521 parent: Service
522 An optional reference of the parent service to mix in.
524 Returns
525 -------
526 svc : Service
527 An instance of the `Service` class that is a combination of all
528 services from the list plus the parent mix-in.
529 """
530 service = Service()
531 if parent:
532 service.register(parent.export())
534 for config in config_list:
535 service.register(
536 self._build_standalone_service(config, global_config, service).export()
537 )
539 if _LOG.isEnabledFor(logging.DEBUG):
540 _LOG.debug("Created mix-in service: %s", service)
542 return service
544 def build_service(
545 self,
546 config: Dict[str, Any],
547 global_config: Optional[Dict[str, Any]] = None,
548 parent: Optional[Service] = None,
549 ) -> Service:
550 """
551 Factory method for a new service with a given config.
553 Parameters
554 ----------
555 config : dict
556 A dictionary with 2 mandatory fields:
557 "class": FQN of a Python class to instantiate;
558 "config": Free-format dictionary to pass to the constructor.
559 global_config : dict
560 Global parameters to add to the service config.
561 parent: Service
562 An optional reference of the parent service to mix in.
564 Returns
565 -------
566 svc : Service
567 An instance of the `Service` class that is a combination of all
568 services from the list plus the parent mix-in.
569 """
570 if _LOG.isEnabledFor(logging.DEBUG):
571 _LOG.debug("Build service from config:\n%s", json5.dumps(config, indent=2))
573 assert isinstance(config, dict)
574 config_list: List[Dict[str, Any]]
575 if "class" not in config:
576 # Top level config is a simple object with a list of services
577 config_list = config["services"]
578 else:
579 # Top level config is a single service
580 if parent is None:
581 return self._build_standalone_service(config, global_config)
582 config_list = [config]
584 return self._build_composite_service(config_list, global_config, parent)
586 def load_environment(
587 self,
588 json: str,
589 tunables: TunableGroups,
590 global_config: Optional[Dict[str, Any]] = None,
591 parent_args: Optional[Dict[str, TunableValue]] = None,
592 service: Optional[Service] = None,
593 ) -> Environment:
594 # pylint: disable=too-many-arguments,too-many-positional-arguments
595 """
596 Load and build new :py:class:`.Environment` from the config file or JSON string.
598 Parameters
599 ----------
600 json : str
601 The environment JSON configuration file or JSON string.
602 tunables : TunableGroups
603 A (possibly empty) collection of tunables to add to the environment.
604 global_config : dict
605 Global parameters to add to the environment config.
606 parent_args : Dict[str, TunableValue]
607 An optional reference of the parent CompositeEnv's const_args used to
608 expand dynamic config parameters from.
609 service : Service
610 An optional reference of the parent service to mix in.
612 Returns
613 -------
614 env : Environment
615 A new benchmarking environment.
616 """
617 config = self.load_config(json, ConfigSchema.ENVIRONMENT)
618 assert isinstance(config, dict)
619 return self.build_environment(config, tunables, global_config, parent_args, service)
621 def load_environment_list(
622 self,
623 json: str,
624 tunables: TunableGroups,
625 global_config: Optional[Dict[str, Any]] = None,
626 parent_args: Optional[Dict[str, TunableValue]] = None,
627 service: Optional[Service] = None,
628 ) -> List[Environment]:
629 # pylint: disable=too-many-arguments,too-many-positional-arguments
630 """
631 Load and build a list of Environments from the config file or JSON string.
633 Parameters
634 ----------
635 json : str
636 The environment JSON configuration file or a JSON string.
637 Can contain either one environment or a list of environments.
638 tunables : TunableGroups
639 An (possibly empty) collection of tunables to add to the environment.
640 global_config : dict
641 Global parameters to add to the environment config.
642 service : Service
643 An optional reference of the parent service to mix in.
644 parent_args : Dict[str, TunableValue]
645 An optional reference of the parent CompositeEnv's const_args used to
646 expand dynamic config parameters from.
648 Returns
649 -------
650 env : List[Environment]
651 A list of new benchmarking environments.
652 """
653 config = self.load_config(json, ConfigSchema.ENVIRONMENT)
654 return [self.build_environment(config, tunables, global_config, parent_args, service)]
656 def load_services(
657 self,
658 jsons: Iterable[str],
659 global_config: Optional[Dict[str, Any]] = None,
660 parent: Optional[Service] = None,
661 ) -> Service:
662 """
663 Read the configuration files or JSON strings and bundle all Service methods from
664 those configs into a single Service object.
666 Notes
667 -----
668 Order of the services in the list matters. If multiple Services export the
669 same method, the last one in the list will be used.
671 Parameters
672 ----------
673 jsons : list of str
674 A list of service JSON configuration files or JSON strings.
675 global_config : dict
676 Global parameters to add to the service config.
677 parent : Service
678 An optional reference of the parent service to mix in.
680 Returns
681 -------
682 service : Service
683 A collection of service methods.
684 """
685 _LOG.info("Load services: %s parent: %s", jsons, parent.__class__.__name__)
686 service = Service({}, global_config, parent)
687 for json in jsons:
688 config = self.load_config(json, ConfigSchema.SERVICE)
689 service.register(self.build_service(config, global_config, service).export())
690 return service
692 def load_tunables(
693 self,
694 jsons: Iterable[str],
695 parent: Optional[TunableGroups] = None,
696 ) -> TunableGroups:
697 """
698 Load a collection of tunable parameters from JSON files or strings into the
699 parent TunableGroup.
701 This helps allow standalone environment configs to reference
702 overlapping tunable groups configs but still allow combining them into
703 a single instance that each environment can reference.
705 Parameters
706 ----------
707 jsons : list of str
708 A list of JSON files or JSON strings to load.
709 parent : TunableGroups
710 A (possibly empty) collection of tunables to add to the new collection.
712 Returns
713 -------
714 tunables : TunableGroups
715 The larger collection of tunable parameters.
716 """
717 _LOG.info("Load tunables: '%s'", jsons)
718 if parent is None:
719 parent = TunableGroups()
720 tunables = parent.copy()
721 for json in jsons:
722 config = self.load_config(json, ConfigSchema.TUNABLE_PARAMS)
723 assert isinstance(config, dict)
724 tunables.merge(TunableGroups(config))
725 return tunables