Coverage for mlos_bench/mlos_bench/services/config_persistence.py: 95%
169 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Helper functions to load, instantiate, and serialize Python objects that encapsulate
6a benchmark :py:class:`.Environment`, :py:mod:`~mlos_bench.tunables`,
7:py:class:`.Service` functions, etc from JSON configuration files and strings.
8"""
10import logging
11import os
12from collections.abc import Callable, Iterable
13from importlib.resources import files
14from typing import TYPE_CHECKING, Any
16import json5 # To read configs with comments and other JSON5 syntax features
17from jsonschema import SchemaError, ValidationError
19from mlos_bench.config.schemas.config_schemas import ConfigSchema
20from mlos_bench.environments.base_environment import Environment
21from mlos_bench.optimizers.base_optimizer import Optimizer
22from mlos_bench.services.base_service import Service
23from mlos_bench.services.types.config_loader_type import SupportsConfigLoading
24from mlos_bench.tunables.tunable import TunableValue
25from mlos_bench.tunables.tunable_groups import TunableGroups
26from mlos_bench.util import (
27 instantiate_from_config,
28 merge_parameters,
29 path_join,
30 preprocess_dynamic_configs,
31)
33if TYPE_CHECKING:
34 from mlos_bench.schedulers.base_scheduler import Scheduler
35 from mlos_bench.schedulers.trial_runner import TrialRunner
36 from mlos_bench.storage.base_storage import Storage
39_LOG = logging.getLogger(__name__)
42class ConfigPersistenceService(Service, SupportsConfigLoading):
43 """Collection of methods to deserialize the Environment, Service, and TunableGroups
44 objects.
45 """
47 BUILTIN_CONFIG_PATH = str(files("mlos_bench.config").joinpath("")).replace("\\", "/")
48 """A calculated path to the built-in configuration files shipped with the mlos_bench
49 package.
50 """
52 def __init__(
53 self,
54 config: dict[str, Any] | None = None,
55 global_config: dict[str, Any] | None = None,
56 parent: Service | None = None,
57 methods: dict[str, Callable] | list[Callable] | None = None,
58 ):
59 """
60 Create a new instance of config persistence service.
62 Parameters
63 ----------
64 config : dict
65 Free-format dictionary that contains parameters for the service.
66 (E.g., root path for config files, etc.)
67 global_config : dict
68 Free-format dictionary of global parameters.
69 parent : Service
70 An optional parent service that can provide mixin functions.
71 methods : dict[str, Callable] | list[Callable] | None
72 New methods to register with the service.
73 """
74 super().__init__(
75 config,
76 global_config,
77 parent,
78 self.merge_methods(
79 methods,
80 [
81 self.get_config_paths,
82 self.resolve_path,
83 self.load_config,
84 self.prepare_class_load,
85 self.build_service,
86 self.build_environment,
87 self.load_services,
88 self.load_environment,
89 self.load_environment_list,
90 ],
91 ),
92 )
93 self._config_loader_service = self
95 # Normalize and deduplicate config paths, but maintain order.
96 self._config_path: list[str] = []
97 for path in self.config.get("config_path", []):
98 if path not in self._config_path:
99 self._config_path.append(path_join(path, abs_path=True))
100 # Prepend the cwd if not already on the list.
101 cwd = path_join(os.getcwd(), abs_path=True)
102 if cwd not in self._config_path:
103 self._config_path.insert(0, cwd)
104 # Append the built-in config path if not already on the list.
105 if self.BUILTIN_CONFIG_PATH not in self._config_path:
106 self._config_path.append(self.BUILTIN_CONFIG_PATH)
108 @property
109 def config_paths(self) -> list[str]:
110 """
111 Gets the list of config paths this service will search for config files.
113 Returns
114 -------
115 list[str]
116 """
117 return list(self._config_path) # make a copy to avoid modifications
119 def get_config_paths(self) -> list[str]:
120 """
121 Gets the list of config paths this service will search for config files.
123 Returns
124 -------
125 list[str]
126 """
127 return self.config_paths
129 def resolve_path(self, file_path: str, extra_paths: Iterable[str] | None = None) -> str:
130 """
131 Resolves and prepends the suitable :py:attr:`.config_paths` to ``file_path`` if
132 the latter is not absolute. If :py:attr:`.config_paths` is ``None`` or
133 ``file_path`` is absolute, return ``file_path`` as is.
135 Parameters
136 ----------
137 file_path : str
138 Path to the input config file.
139 extra_paths : Iterable[str]
140 Additional directories to prepend to the list of
141 :py:attr:`.config_paths` search paths.
143 Returns
144 -------
145 path : str
146 An actual path to the config or script.
147 """
148 path_list = list(extra_paths or []) + self._config_path
149 _LOG.debug("Resolve path: %s in: %s", file_path, path_list)
150 if os.path.isabs(file_path):
151 _LOG.debug("Path is absolute: %s", file_path)
152 return file_path
153 for path in path_list:
154 full_path = path_join(path, file_path, abs_path=True)
155 if os.path.exists(full_path):
156 _LOG.debug("Path resolved: %s", full_path)
157 return full_path
158 _LOG.debug("Path not resolved: %s", file_path)
159 return file_path
161 def load_config(
162 self,
163 json: str,
164 schema_type: ConfigSchema | None,
165 ) -> dict[str, Any]:
166 """
167 Load JSON config file or JSON string. Search for a file relative to
168 :py:attr:`.config_paths` if the input path is not absolute. This method is
169 exported to be used as a :py:class:`.SupportsConfigLoading` type
170 :py:class:`.Service`.
172 Parameters
173 ----------
174 json : str
175 Path to the input config file or a JSON string.
176 schema_type : ConfigSchema | None
177 The schema type to validate the config against.
179 Returns
180 -------
181 config : dict | list[dict]
182 Free-format dictionary that contains the configuration.
183 """
184 assert isinstance(json, str)
185 if any(c in json for c in ("{", "[")):
186 # If the path contains braces, it is likely already a json string,
187 # so just parse it.
188 _LOG.info("Load config from json string: %s", json)
189 try:
190 config: Any = json5.loads(json)
191 except ValueError as ex:
192 _LOG.error("Failed to parse config from JSON string: %s", json)
193 raise ValueError(f"Failed to parse config from JSON string: {json}") from ex
194 else:
195 json = self.resolve_path(json)
196 _LOG.info("Load config file: %s", json)
197 with open(json, encoding="utf-8") as fh_json:
198 config = json5.load(fh_json)
199 if schema_type is not None:
200 try:
201 schema_type.validate(config)
202 except (ValidationError, SchemaError) as ex:
203 _LOG.error(
204 "Failed to validate config %s against schema type %s at %s",
205 json,
206 schema_type.name,
207 schema_type.value,
208 )
209 raise ValueError(
210 f"Failed to validate config {json} against "
211 f"schema type {schema_type.name} at {schema_type.value}"
212 ) from ex
213 if isinstance(config, dict) and config.get("$schema"):
214 # Remove $schema attributes from the config after we've validated
215 # them to avoid passing them on to other objects
216 # (e.g. SqlAlchemy based storage initializers).
217 # NOTE: we only do this for internal schemas.
218 # Other configs that get loaded may need the schema field
219 # (e.g. Azure ARM templates).
220 del config["$schema"]
221 else:
222 _LOG.warning("Config %s is not validated against a schema.", json)
223 return config # type: ignore[no-any-return]
225 def prepare_class_load(
226 self,
227 config: dict[str, Any],
228 global_config: dict[str, Any] | None = None,
229 parent_args: dict[str, TunableValue] | None = None,
230 ) -> tuple[str, dict[str, Any]]:
231 """
232 Extract the class instantiation parameters from the configuration. Mix-in the
233 global parameters and resolve the local file system paths, where it is required.
235 Parameters
236 ----------
237 config : dict
238 Configuration of the optimizer.
239 global_config : dict
240 Global configuration parameters (optional).
241 parent_args : dict[str, TunableValue]
242 An optional reference of the parent CompositeEnv's const_args used to
243 expand dynamic config parameters from.
245 Returns
246 -------
247 (class_name, class_config) : (str, dict)
248 Name of the class to instantiate and its configuration.
249 """
250 class_name = config["class"]
251 class_config = config.setdefault("config", {})
253 # Replace any appearance of "$param_name" in the const_arg values with
254 # the value from the parent CompositeEnv.
255 # Note: we could consider expanding this feature to additional config
256 # sections in the future, but for now only use it in const_args.
257 if class_name.startswith("mlos_bench.environments."):
258 const_args = class_config.get("const_args", {})
259 preprocess_dynamic_configs(dest=const_args, source=parent_args)
261 merge_parameters(dest=class_config, source=global_config)
263 for key in set(class_config).intersection(config.get("resolve_config_property_paths", [])):
264 if isinstance(class_config[key], str):
265 class_config[key] = self.resolve_path(class_config[key])
266 elif isinstance(class_config[key], (list, tuple)):
267 class_config[key] = [self.resolve_path(path) for path in class_config[key]]
268 else:
269 raise ValueError(f"Parameter {key} must be a string or a list")
271 if _LOG.isEnabledFor(logging.DEBUG):
272 _LOG.debug(
273 "Instantiating: %s with config:\n%s",
274 class_name,
275 json5.dumps(class_config, indent=2),
276 )
278 return (class_name, class_config)
280 def build_optimizer(
281 self,
282 *,
283 tunables: TunableGroups,
284 service: Service,
285 config: dict[str, Any],
286 global_config: dict[str, Any] | None = None,
287 ) -> Optimizer:
288 """
289 Instantiation of :py:mod:`mlos_bench` :py:class:`.Optimizer` that depend on
290 :py:class:`.Service` and :py:class:`.TunableGroups`.
292 Parameters
293 ----------
294 tunables : TunableGroups
295 Tunable parameters of the environment. We need them to validate the
296 configurations of merged-in experiments and restored/pending trials.
297 service: Service
298 An optional service object (e.g., providing methods to load config files, etc.)
299 config : dict
300 Configuration of the class to instantiate, as loaded from JSON.
301 global_config : dict
302 Global configuration parameters (optional).
304 Returns
305 -------
306 inst : Optimizer
307 A new instance of the `Optimizer` class.
308 """
309 tunables_path = config.get("include_tunables")
310 if tunables_path is not None:
311 tunables = self.load_tunables(tunables_path, tunables)
312 (class_name, class_config) = self.prepare_class_load(config, global_config)
313 inst = instantiate_from_config(
314 Optimizer, # type: ignore[type-abstract]
315 class_name,
316 tunables=tunables,
317 config=class_config,
318 global_config=global_config,
319 service=service,
320 )
321 _LOG.info("Created: Optimizer %s", inst)
322 return inst
324 def build_storage(
325 self,
326 *,
327 service: Service,
328 config: dict[str, Any],
329 global_config: dict[str, Any] | None = None,
330 ) -> "Storage":
331 """
332 Instantiation of mlos_bench :py:class:`.Storage` objects.
334 Parameters
335 ----------
336 service: Service
337 An optional service object (e.g., providing methods to load config files, etc.)
338 config : dict
339 Configuration of the class to instantiate, as loaded from JSON.
340 global_config : dict
341 Global configuration parameters (optional).
343 Returns
344 -------
345 inst : Storage
346 A new instance of the Storage class.
347 """
348 (class_name, class_config) = self.prepare_class_load(config, global_config)
349 # pylint: disable=import-outside-toplevel
350 from mlos_bench.storage.base_storage import Storage
352 inst = instantiate_from_config(
353 Storage, # type: ignore[type-abstract]
354 class_name,
355 config=class_config,
356 global_config=global_config,
357 service=service,
358 )
359 _LOG.info("Created: Storage %s", inst)
360 return inst
362 def build_scheduler( # pylint: disable=too-many-arguments
363 self,
364 *,
365 config: dict[str, Any],
366 global_config: dict[str, Any],
367 trial_runners: list["TrialRunner"],
368 optimizer: Optimizer,
369 storage: "Storage",
370 root_env_config: str,
371 ) -> "Scheduler":
372 """
373 Instantiation of mlos_bench :py:class:`.Scheduler`.
375 Parameters
376 ----------
377 config : dict
378 Configuration of the class to instantiate, as loaded from JSON.
379 global_config : dict
380 Global configuration parameters.
381 trial_runners : List[TrialRunner]
382 The TrialRunners (Environments) to use.
383 optimizer : Optimizer
384 The optimizer to use.
385 storage : Storage
386 The storage to use.
387 root_env_config : str
388 Path to the root environment configuration.
390 Returns
391 -------
392 inst : Scheduler
393 A new instance of the Scheduler.
394 """
395 (class_name, class_config) = self.prepare_class_load(config, global_config)
396 # pylint: disable=import-outside-toplevel
397 from mlos_bench.schedulers.base_scheduler import Scheduler
399 inst = instantiate_from_config(
400 Scheduler, # type: ignore[type-abstract]
401 class_name,
402 config=class_config,
403 global_config=global_config,
404 trial_runners=trial_runners,
405 optimizer=optimizer,
406 storage=storage,
407 root_env_config=root_env_config,
408 )
409 _LOG.info("Created: Scheduler %s", inst)
410 return inst
412 def build_environment(
413 self,
414 config: dict[str, Any],
415 tunables: TunableGroups,
416 global_config: dict[str, Any] | None = None,
417 parent_args: dict[str, TunableValue] | None = None,
418 service: Service | None = None,
419 ) -> Environment:
420 # pylint: disable=too-many-arguments,too-many-positional-arguments
421 """
422 Factory method for a new :py:class:`.Environment` with a given config.
424 Parameters
425 ----------
426 config : dict
427 A dictionary with three mandatory fields:
428 "name": Human-readable string describing the environment;
429 "class": FQN of a Python class to instantiate;
430 "config": Free-format dictionary to pass to the constructor.
431 tunables : TunableGroups
432 A (possibly empty) collection of groups of tunable parameters for
433 all environments.
434 global_config : dict
435 Global parameters to add to the environment config.
436 parent_args : dict[str, TunableValue]
437 An optional reference of the parent CompositeEnv's const_args used to
438 expand dynamic config parameters from.
439 service: Service
440 An optional service object (e.g., providing methods to
441 deploy or reboot a VM, etc.).
443 Returns
444 -------
445 env : Environment
446 An instance of the ``Environment`` class initialized with ``config``.
447 """
448 env_name = config["name"]
449 (env_class, env_config) = self.prepare_class_load(config, global_config, parent_args)
451 env_services_path = config.get("include_services")
452 if env_services_path is not None:
453 service = self.load_services(env_services_path, global_config, service)
455 env_tunables_path = config.get("include_tunables")
456 if env_tunables_path is not None:
457 tunables = self.load_tunables(env_tunables_path, tunables)
459 _LOG.debug("Creating env: %s :: %s", env_name, env_class)
460 env = Environment.new(
461 env_name=env_name,
462 class_name=env_class,
463 config=env_config,
464 global_config=global_config,
465 tunables=tunables,
466 service=service,
467 )
469 _LOG.info("Created env: %s :: %s", env_name, env)
470 return env
472 def _build_standalone_service(
473 self,
474 config: dict[str, Any],
475 global_config: dict[str, Any] | None = None,
476 parent: Service | None = None,
477 ) -> Service:
478 """
479 Factory method for a new service with a given config.
481 Parameters
482 ----------
483 config : dict
484 A dictionary with two mandatory fields:
485 "class": FQN of a Python class to instantiate;
486 "config": Free-format dictionary to pass to the constructor.
487 global_config : dict
488 Global parameters to add to the service config.
489 parent: Service
490 An optional reference of the parent service to mix in.
492 Returns
493 -------
494 svc : Service
495 An instance of the `Service` class initialized with `config`.
496 """
497 (svc_class, svc_config) = self.prepare_class_load(config, global_config)
498 service = Service.new(svc_class, svc_config, global_config, parent)
499 _LOG.info("Created service: %s", service)
500 return service
502 def _build_composite_service(
503 self,
504 config_list: Iterable[dict[str, Any]],
505 global_config: dict[str, Any] | None = None,
506 parent: Service | None = None,
507 ) -> Service:
508 """
509 Factory method for a new service with a given config.
511 Parameters
512 ----------
513 config_list : a list of dict
514 A list where each element is a dictionary with 2 mandatory fields:
515 "class": FQN of a Python class to instantiate;
516 "config": Free-format dictionary to pass to the constructor.
517 global_config : dict
518 Global parameters to add to the service config.
519 parent: Service
520 An optional reference of the parent service to mix in.
522 Returns
523 -------
524 svc : Service
525 An instance of the `Service` class that is a combination of all
526 services from the list plus the parent mix-in.
527 """
528 service = Service()
529 if parent:
530 service.register(parent.export())
532 for config in config_list:
533 service.register(
534 self._build_standalone_service(config, global_config, service).export()
535 )
537 if _LOG.isEnabledFor(logging.DEBUG):
538 _LOG.debug("Created mix-in service: %s", service)
540 return service
542 def build_service(
543 self,
544 config: dict[str, Any],
545 global_config: dict[str, Any] | None = None,
546 parent: Service | None = None,
547 ) -> Service:
548 """
549 Factory method for a new service with a given config.
551 Parameters
552 ----------
553 config : dict
554 A dictionary with 2 mandatory fields:
555 "class": FQN of a Python class to instantiate;
556 "config": Free-format dictionary to pass to the constructor.
557 global_config : dict
558 Global parameters to add to the service config.
559 parent: Service
560 An optional reference of the parent service to mix in.
562 Returns
563 -------
564 svc : Service
565 An instance of the `Service` class that is a combination of all
566 services from the list plus the parent mix-in.
567 """
568 if _LOG.isEnabledFor(logging.DEBUG):
569 _LOG.debug("Build service from config:\n%s", json5.dumps(config, indent=2))
571 assert isinstance(config, dict)
572 config_list: list[dict[str, Any]]
573 if "class" not in config:
574 # Top level config is a simple object with a list of services
575 config_list = config["services"]
576 else:
577 # Top level config is a single service
578 if parent is None:
579 return self._build_standalone_service(config, global_config)
580 config_list = [config]
582 return self._build_composite_service(config_list, global_config, parent)
584 def load_environment(
585 self,
586 json: str,
587 tunables: TunableGroups,
588 global_config: dict[str, Any] | None = None,
589 parent_args: dict[str, TunableValue] | None = None,
590 service: Service | None = None,
591 ) -> Environment:
592 # pylint: disable=too-many-arguments,too-many-positional-arguments
593 """
594 Load and build new :py:class:`.Environment` from the config file or JSON string.
596 Parameters
597 ----------
598 json : str
599 The environment JSON configuration file or JSON string.
600 tunables : TunableGroups
601 A (possibly empty) collection of tunables to add to the environment.
602 global_config : dict
603 Global parameters to add to the environment config.
604 parent_args : dict[str, TunableValue]
605 An optional reference of the parent CompositeEnv's const_args used to
606 expand dynamic config parameters from.
607 service : Service
608 An optional reference of the parent service to mix in.
610 Returns
611 -------
612 env : Environment
613 A new benchmarking environment.
614 """
615 config = self.load_config(json, ConfigSchema.ENVIRONMENT)
616 assert isinstance(config, dict)
617 return self.build_environment(config, tunables, global_config, parent_args, service)
619 def load_environment_list(
620 self,
621 json: str,
622 tunables: TunableGroups,
623 global_config: dict[str, Any] | None = None,
624 parent_args: dict[str, TunableValue] | None = None,
625 service: Service | None = None,
626 ) -> list[Environment]:
627 # pylint: disable=too-many-arguments,too-many-positional-arguments
628 """
629 Load and build a list of Environments from the config file or JSON string.
631 Parameters
632 ----------
633 json : str
634 The environment JSON configuration file or a JSON string.
635 Can contain either one environment or a list of environments.
636 tunables : TunableGroups
637 An (possibly empty) collection of tunables to add to the environment.
638 global_config : dict
639 Global parameters to add to the environment config.
640 service : Service
641 An optional reference of the parent service to mix in.
642 parent_args : dict[str, TunableValue]
643 An optional reference of the parent CompositeEnv's const_args used to
644 expand dynamic config parameters from.
646 Returns
647 -------
648 env : list[Environment]
649 A list of new benchmarking environments.
650 """
651 config = self.load_config(json, ConfigSchema.ENVIRONMENT)
652 return [self.build_environment(config, tunables, global_config, parent_args, service)]
654 def load_services(
655 self,
656 jsons: Iterable[str],
657 global_config: dict[str, Any] | None = None,
658 parent: Service | None = None,
659 ) -> Service:
660 """
661 Read the configuration files or JSON strings and bundle all Service methods from
662 those configs into a single Service object.
664 Notes
665 -----
666 Order of the services in the list matters. If multiple Services export the
667 same method, the last one in the list will be used.
669 Parameters
670 ----------
671 jsons : list of str
672 A list of service JSON configuration files or JSON strings.
673 global_config : dict
674 Global parameters to add to the service config.
675 parent : Service
676 An optional reference of the parent service to mix in.
678 Returns
679 -------
680 service : Service
681 A collection of service methods.
682 """
683 _LOG.info("Load services: %s parent: %s", jsons, parent.__class__.__name__)
684 service = Service({}, global_config, parent)
685 for json in jsons:
686 config = self.load_config(json, ConfigSchema.SERVICE)
687 service.register(self.build_service(config, global_config, service).export())
688 return service
690 def load_tunables(
691 self,
692 jsons: Iterable[str],
693 parent: TunableGroups | None = None,
694 ) -> TunableGroups:
695 """
696 Load a collection of tunable parameters from JSON files or strings into the
697 parent TunableGroup.
699 This helps allow standalone environment configs to reference
700 overlapping tunable groups configs but still allow combining them into
701 a single instance that each environment can reference.
703 Parameters
704 ----------
705 jsons : list of str
706 A list of JSON files or JSON strings to load.
707 parent : TunableGroups
708 A (possibly empty) collection of tunables to add to the new collection.
710 Returns
711 -------
712 tunables : TunableGroups
713 The larger collection of tunable parameters.
714 """
715 _LOG.info("Load tunables: '%s'", jsons)
716 if parent is None:
717 parent = TunableGroups()
718 tunables = parent.copy()
719 for json in jsons:
720 config = self.load_config(json, ConfigSchema.TUNABLE_PARAMS)
721 assert isinstance(config, dict)
722 tunables.merge(TunableGroups(config))
723 return tunables