Coverage for mlos_bench/mlos_bench/services/config_persistence.py: 95%

169 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-21 01:50 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Helper functions to load, instantiate, and serialize Python objects that encapsulate 

6a benchmark :py:class:`.Environment`, :py:mod:`~mlos_bench.tunables`, 

7:py:class:`.Service` functions, etc from JSON configuration files and strings. 

8""" 

9 

10import logging 

11import os 

12from collections.abc import Callable, Iterable 

13from importlib.resources import files 

14from typing import TYPE_CHECKING, Any 

15 

16import json5 # To read configs with comments and other JSON5 syntax features 

17from jsonschema import SchemaError, ValidationError 

18 

19from mlos_bench.config.schemas.config_schemas import ConfigSchema 

20from mlos_bench.environments.base_environment import Environment 

21from mlos_bench.optimizers.base_optimizer import Optimizer 

22from mlos_bench.services.base_service import Service 

23from mlos_bench.services.types.config_loader_type import SupportsConfigLoading 

24from mlos_bench.tunables.tunable import TunableValue 

25from mlos_bench.tunables.tunable_groups import TunableGroups 

26from mlos_bench.util import ( 

27 instantiate_from_config, 

28 merge_parameters, 

29 path_join, 

30 preprocess_dynamic_configs, 

31) 

32 

33if TYPE_CHECKING: 

34 from mlos_bench.schedulers.base_scheduler import Scheduler 

35 from mlos_bench.schedulers.trial_runner import TrialRunner 

36 from mlos_bench.storage.base_storage import Storage 

37 

38 

39_LOG = logging.getLogger(__name__) 

40 

41 

42class ConfigPersistenceService(Service, SupportsConfigLoading): 

43 """Collection of methods to deserialize the Environment, Service, and TunableGroups 

44 objects. 

45 """ 

46 

47 BUILTIN_CONFIG_PATH = str(files("mlos_bench.config").joinpath("")).replace("\\", "/") 

48 """A calculated path to the built-in configuration files shipped with the mlos_bench 

49 package. 

50 """ 

51 

52 def __init__( 

53 self, 

54 config: dict[str, Any] | None = None, 

55 global_config: dict[str, Any] | None = None, 

56 parent: Service | None = None, 

57 methods: dict[str, Callable] | list[Callable] | None = None, 

58 ): 

59 """ 

60 Create a new instance of config persistence service. 

61 

62 Parameters 

63 ---------- 

64 config : dict 

65 Free-format dictionary that contains parameters for the service. 

66 (E.g., root path for config files, etc.) 

67 global_config : dict 

68 Free-format dictionary of global parameters. 

69 parent : Service 

70 An optional parent service that can provide mixin functions. 

71 methods : dict[str, Callable] | list[Callable] | None 

72 New methods to register with the service. 

73 """ 

74 super().__init__( 

75 config, 

76 global_config, 

77 parent, 

78 self.merge_methods( 

79 methods, 

80 [ 

81 self.get_config_paths, 

82 self.resolve_path, 

83 self.load_config, 

84 self.prepare_class_load, 

85 self.build_service, 

86 self.build_environment, 

87 self.load_services, 

88 self.load_environment, 

89 self.load_environment_list, 

90 ], 

91 ), 

92 ) 

93 self._config_loader_service = self 

94 

95 # Normalize and deduplicate config paths, but maintain order. 

96 self._config_path: list[str] = [] 

97 for path in self.config.get("config_path", []): 

98 if path not in self._config_path: 

99 self._config_path.append(path_join(path, abs_path=True)) 

100 # Prepend the cwd if not already on the list. 

101 cwd = path_join(os.getcwd(), abs_path=True) 

102 if cwd not in self._config_path: 

103 self._config_path.insert(0, cwd) 

104 # Append the built-in config path if not already on the list. 

105 if self.BUILTIN_CONFIG_PATH not in self._config_path: 

106 self._config_path.append(self.BUILTIN_CONFIG_PATH) 

107 

108 @property 

109 def config_paths(self) -> list[str]: 

110 """ 

111 Gets the list of config paths this service will search for config files. 

112 

113 Returns 

114 ------- 

115 list[str] 

116 """ 

117 return list(self._config_path) # make a copy to avoid modifications 

118 

119 def get_config_paths(self) -> list[str]: 

120 """ 

121 Gets the list of config paths this service will search for config files. 

122 

123 Returns 

124 ------- 

125 list[str] 

126 """ 

127 return self.config_paths 

128 

129 def resolve_path(self, file_path: str, extra_paths: Iterable[str] | None = None) -> str: 

130 """ 

131 Resolves and prepends the suitable :py:attr:`.config_paths` to ``file_path`` if 

132 the latter is not absolute. If :py:attr:`.config_paths` is ``None`` or 

133 ``file_path`` is absolute, return ``file_path`` as is. 

134 

135 Parameters 

136 ---------- 

137 file_path : str 

138 Path to the input config file. 

139 extra_paths : Iterable[str] 

140 Additional directories to prepend to the list of 

141 :py:attr:`.config_paths` search paths. 

142 

143 Returns 

144 ------- 

145 path : str 

146 An actual path to the config or script. 

147 """ 

148 path_list = list(extra_paths or []) + self._config_path 

149 _LOG.debug("Resolve path: %s in: %s", file_path, path_list) 

150 if os.path.isabs(file_path): 

151 _LOG.debug("Path is absolute: %s", file_path) 

152 return file_path 

153 for path in path_list: 

154 full_path = path_join(path, file_path, abs_path=True) 

155 if os.path.exists(full_path): 

156 _LOG.debug("Path resolved: %s", full_path) 

157 return full_path 

158 _LOG.debug("Path not resolved: %s", file_path) 

159 return file_path 

160 

161 def load_config( 

162 self, 

163 json: str, 

164 schema_type: ConfigSchema | None, 

165 ) -> dict[str, Any]: 

166 """ 

167 Load JSON config file or JSON string. Search for a file relative to 

168 :py:attr:`.config_paths` if the input path is not absolute. This method is 

169 exported to be used as a :py:class:`.SupportsConfigLoading` type 

170 :py:class:`.Service`. 

171 

172 Parameters 

173 ---------- 

174 json : str 

175 Path to the input config file or a JSON string. 

176 schema_type : ConfigSchema | None 

177 The schema type to validate the config against. 

178 

179 Returns 

180 ------- 

181 config : dict | list[dict] 

182 Free-format dictionary that contains the configuration. 

183 """ 

184 assert isinstance(json, str) 

185 if any(c in json for c in ("{", "[")): 

186 # If the path contains braces, it is likely already a json string, 

187 # so just parse it. 

188 _LOG.info("Load config from json string: %s", json) 

189 try: 

190 config: Any = json5.loads(json) 

191 except ValueError as ex: 

192 _LOG.error("Failed to parse config from JSON string: %s", json) 

193 raise ValueError(f"Failed to parse config from JSON string: {json}") from ex 

194 else: 

195 json = self.resolve_path(json) 

196 _LOG.info("Load config file: %s", json) 

197 with open(json, encoding="utf-8") as fh_json: 

198 config = json5.load(fh_json) 

199 if schema_type is not None: 

200 try: 

201 schema_type.validate(config) 

202 except (ValidationError, SchemaError) as ex: 

203 _LOG.error( 

204 "Failed to validate config %s against schema type %s at %s", 

205 json, 

206 schema_type.name, 

207 schema_type.value, 

208 ) 

209 raise ValueError( 

210 f"Failed to validate config {json} against " 

211 f"schema type {schema_type.name} at {schema_type.value}" 

212 ) from ex 

213 if isinstance(config, dict) and config.get("$schema"): 

214 # Remove $schema attributes from the config after we've validated 

215 # them to avoid passing them on to other objects 

216 # (e.g. SqlAlchemy based storage initializers). 

217 # NOTE: we only do this for internal schemas. 

218 # Other configs that get loaded may need the schema field 

219 # (e.g. Azure ARM templates). 

220 del config["$schema"] 

221 else: 

222 _LOG.warning("Config %s is not validated against a schema.", json) 

223 return config # type: ignore[no-any-return] 

224 

225 def prepare_class_load( 

226 self, 

227 config: dict[str, Any], 

228 global_config: dict[str, Any] | None = None, 

229 parent_args: dict[str, TunableValue] | None = None, 

230 ) -> tuple[str, dict[str, Any]]: 

231 """ 

232 Extract the class instantiation parameters from the configuration. Mix-in the 

233 global parameters and resolve the local file system paths, where it is required. 

234 

235 Parameters 

236 ---------- 

237 config : dict 

238 Configuration of the optimizer. 

239 global_config : dict 

240 Global configuration parameters (optional). 

241 parent_args : dict[str, TunableValue] 

242 An optional reference of the parent CompositeEnv's const_args used to 

243 expand dynamic config parameters from. 

244 

245 Returns 

246 ------- 

247 (class_name, class_config) : (str, dict) 

248 Name of the class to instantiate and its configuration. 

249 """ 

250 class_name = config["class"] 

251 class_config = config.setdefault("config", {}) 

252 

253 # Replace any appearance of "$param_name" in the const_arg values with 

254 # the value from the parent CompositeEnv. 

255 # Note: we could consider expanding this feature to additional config 

256 # sections in the future, but for now only use it in const_args. 

257 if class_name.startswith("mlos_bench.environments."): 

258 const_args = class_config.get("const_args", {}) 

259 preprocess_dynamic_configs(dest=const_args, source=parent_args) 

260 

261 merge_parameters(dest=class_config, source=global_config) 

262 

263 for key in set(class_config).intersection(config.get("resolve_config_property_paths", [])): 

264 if isinstance(class_config[key], str): 

265 class_config[key] = self.resolve_path(class_config[key]) 

266 elif isinstance(class_config[key], (list, tuple)): 

267 class_config[key] = [self.resolve_path(path) for path in class_config[key]] 

268 else: 

269 raise ValueError(f"Parameter {key} must be a string or a list") 

270 

271 if _LOG.isEnabledFor(logging.DEBUG): 

272 _LOG.debug( 

273 "Instantiating: %s with config:\n%s", 

274 class_name, 

275 json5.dumps(class_config, indent=2), 

276 ) 

277 

278 return (class_name, class_config) 

279 

280 def build_optimizer( 

281 self, 

282 *, 

283 tunables: TunableGroups, 

284 service: Service, 

285 config: dict[str, Any], 

286 global_config: dict[str, Any] | None = None, 

287 ) -> Optimizer: 

288 """ 

289 Instantiation of :py:mod:`mlos_bench` :py:class:`.Optimizer` that depend on 

290 :py:class:`.Service` and :py:class:`.TunableGroups`. 

291 

292 Parameters 

293 ---------- 

294 tunables : TunableGroups 

295 Tunable parameters of the environment. We need them to validate the 

296 configurations of merged-in experiments and restored/pending trials. 

297 service: Service 

298 An optional service object (e.g., providing methods to load config files, etc.) 

299 config : dict 

300 Configuration of the class to instantiate, as loaded from JSON. 

301 global_config : dict 

302 Global configuration parameters (optional). 

303 

304 Returns 

305 ------- 

306 inst : Optimizer 

307 A new instance of the `Optimizer` class. 

308 """ 

309 tunables_path = config.get("include_tunables") 

310 if tunables_path is not None: 

311 tunables = self.load_tunables(tunables_path, tunables) 

312 (class_name, class_config) = self.prepare_class_load(config, global_config) 

313 inst = instantiate_from_config( 

314 Optimizer, # type: ignore[type-abstract] 

315 class_name, 

316 tunables=tunables, 

317 config=class_config, 

318 global_config=global_config, 

319 service=service, 

320 ) 

321 _LOG.info("Created: Optimizer %s", inst) 

322 return inst 

323 

324 def build_storage( 

325 self, 

326 *, 

327 service: Service, 

328 config: dict[str, Any], 

329 global_config: dict[str, Any] | None = None, 

330 ) -> "Storage": 

331 """ 

332 Instantiation of mlos_bench :py:class:`.Storage` objects. 

333 

334 Parameters 

335 ---------- 

336 service: Service 

337 An optional service object (e.g., providing methods to load config files, etc.) 

338 config : dict 

339 Configuration of the class to instantiate, as loaded from JSON. 

340 global_config : dict 

341 Global configuration parameters (optional). 

342 

343 Returns 

344 ------- 

345 inst : Storage 

346 A new instance of the Storage class. 

347 """ 

348 (class_name, class_config) = self.prepare_class_load(config, global_config) 

349 # pylint: disable=import-outside-toplevel 

350 from mlos_bench.storage.base_storage import Storage 

351 

352 inst = instantiate_from_config( 

353 Storage, # type: ignore[type-abstract] 

354 class_name, 

355 config=class_config, 

356 global_config=global_config, 

357 service=service, 

358 ) 

359 _LOG.info("Created: Storage %s", inst) 

360 return inst 

361 

362 def build_scheduler( # pylint: disable=too-many-arguments 

363 self, 

364 *, 

365 config: dict[str, Any], 

366 global_config: dict[str, Any], 

367 trial_runners: list["TrialRunner"], 

368 optimizer: Optimizer, 

369 storage: "Storage", 

370 root_env_config: str, 

371 ) -> "Scheduler": 

372 """ 

373 Instantiation of mlos_bench :py:class:`.Scheduler`. 

374 

375 Parameters 

376 ---------- 

377 config : dict 

378 Configuration of the class to instantiate, as loaded from JSON. 

379 global_config : dict 

380 Global configuration parameters. 

381 trial_runners : List[TrialRunner] 

382 The TrialRunners (Environments) to use. 

383 optimizer : Optimizer 

384 The optimizer to use. 

385 storage : Storage 

386 The storage to use. 

387 root_env_config : str 

388 Path to the root environment configuration. 

389 

390 Returns 

391 ------- 

392 inst : Scheduler 

393 A new instance of the Scheduler. 

394 """ 

395 (class_name, class_config) = self.prepare_class_load(config, global_config) 

396 # pylint: disable=import-outside-toplevel 

397 from mlos_bench.schedulers.base_scheduler import Scheduler 

398 

399 inst = instantiate_from_config( 

400 Scheduler, # type: ignore[type-abstract] 

401 class_name, 

402 config=class_config, 

403 global_config=global_config, 

404 trial_runners=trial_runners, 

405 optimizer=optimizer, 

406 storage=storage, 

407 root_env_config=root_env_config, 

408 ) 

409 _LOG.info("Created: Scheduler %s", inst) 

410 return inst 

411 

412 def build_environment( 

413 self, 

414 config: dict[str, Any], 

415 tunables: TunableGroups, 

416 global_config: dict[str, Any] | None = None, 

417 parent_args: dict[str, TunableValue] | None = None, 

418 service: Service | None = None, 

419 ) -> Environment: 

420 # pylint: disable=too-many-arguments,too-many-positional-arguments 

421 """ 

422 Factory method for a new :py:class:`.Environment` with a given config. 

423 

424 Parameters 

425 ---------- 

426 config : dict 

427 A dictionary with three mandatory fields: 

428 "name": Human-readable string describing the environment; 

429 "class": FQN of a Python class to instantiate; 

430 "config": Free-format dictionary to pass to the constructor. 

431 tunables : TunableGroups 

432 A (possibly empty) collection of groups of tunable parameters for 

433 all environments. 

434 global_config : dict 

435 Global parameters to add to the environment config. 

436 parent_args : dict[str, TunableValue] 

437 An optional reference of the parent CompositeEnv's const_args used to 

438 expand dynamic config parameters from. 

439 service: Service 

440 An optional service object (e.g., providing methods to 

441 deploy or reboot a VM, etc.). 

442 

443 Returns 

444 ------- 

445 env : Environment 

446 An instance of the ``Environment`` class initialized with ``config``. 

447 """ 

448 env_name = config["name"] 

449 (env_class, env_config) = self.prepare_class_load(config, global_config, parent_args) 

450 

451 env_services_path = config.get("include_services") 

452 if env_services_path is not None: 

453 service = self.load_services(env_services_path, global_config, service) 

454 

455 env_tunables_path = config.get("include_tunables") 

456 if env_tunables_path is not None: 

457 tunables = self.load_tunables(env_tunables_path, tunables) 

458 

459 _LOG.debug("Creating env: %s :: %s", env_name, env_class) 

460 env = Environment.new( 

461 env_name=env_name, 

462 class_name=env_class, 

463 config=env_config, 

464 global_config=global_config, 

465 tunables=tunables, 

466 service=service, 

467 ) 

468 

469 _LOG.info("Created env: %s :: %s", env_name, env) 

470 return env 

471 

472 def _build_standalone_service( 

473 self, 

474 config: dict[str, Any], 

475 global_config: dict[str, Any] | None = None, 

476 parent: Service | None = None, 

477 ) -> Service: 

478 """ 

479 Factory method for a new service with a given config. 

480 

481 Parameters 

482 ---------- 

483 config : dict 

484 A dictionary with two mandatory fields: 

485 "class": FQN of a Python class to instantiate; 

486 "config": Free-format dictionary to pass to the constructor. 

487 global_config : dict 

488 Global parameters to add to the service config. 

489 parent: Service 

490 An optional reference of the parent service to mix in. 

491 

492 Returns 

493 ------- 

494 svc : Service 

495 An instance of the `Service` class initialized with `config`. 

496 """ 

497 (svc_class, svc_config) = self.prepare_class_load(config, global_config) 

498 service = Service.new(svc_class, svc_config, global_config, parent) 

499 _LOG.info("Created service: %s", service) 

500 return service 

501 

502 def _build_composite_service( 

503 self, 

504 config_list: Iterable[dict[str, Any]], 

505 global_config: dict[str, Any] | None = None, 

506 parent: Service | None = None, 

507 ) -> Service: 

508 """ 

509 Factory method for a new service with a given config. 

510 

511 Parameters 

512 ---------- 

513 config_list : a list of dict 

514 A list where each element is a dictionary with 2 mandatory fields: 

515 "class": FQN of a Python class to instantiate; 

516 "config": Free-format dictionary to pass to the constructor. 

517 global_config : dict 

518 Global parameters to add to the service config. 

519 parent: Service 

520 An optional reference of the parent service to mix in. 

521 

522 Returns 

523 ------- 

524 svc : Service 

525 An instance of the `Service` class that is a combination of all 

526 services from the list plus the parent mix-in. 

527 """ 

528 service = Service() 

529 if parent: 

530 service.register(parent.export()) 

531 

532 for config in config_list: 

533 service.register( 

534 self._build_standalone_service(config, global_config, service).export() 

535 ) 

536 

537 if _LOG.isEnabledFor(logging.DEBUG): 

538 _LOG.debug("Created mix-in service: %s", service) 

539 

540 return service 

541 

542 def build_service( 

543 self, 

544 config: dict[str, Any], 

545 global_config: dict[str, Any] | None = None, 

546 parent: Service | None = None, 

547 ) -> Service: 

548 """ 

549 Factory method for a new service with a given config. 

550 

551 Parameters 

552 ---------- 

553 config : dict 

554 A dictionary with 2 mandatory fields: 

555 "class": FQN of a Python class to instantiate; 

556 "config": Free-format dictionary to pass to the constructor. 

557 global_config : dict 

558 Global parameters to add to the service config. 

559 parent: Service 

560 An optional reference of the parent service to mix in. 

561 

562 Returns 

563 ------- 

564 svc : Service 

565 An instance of the `Service` class that is a combination of all 

566 services from the list plus the parent mix-in. 

567 """ 

568 if _LOG.isEnabledFor(logging.DEBUG): 

569 _LOG.debug("Build service from config:\n%s", json5.dumps(config, indent=2)) 

570 

571 assert isinstance(config, dict) 

572 config_list: list[dict[str, Any]] 

573 if "class" not in config: 

574 # Top level config is a simple object with a list of services 

575 config_list = config["services"] 

576 else: 

577 # Top level config is a single service 

578 if parent is None: 

579 return self._build_standalone_service(config, global_config) 

580 config_list = [config] 

581 

582 return self._build_composite_service(config_list, global_config, parent) 

583 

584 def load_environment( 

585 self, 

586 json: str, 

587 tunables: TunableGroups, 

588 global_config: dict[str, Any] | None = None, 

589 parent_args: dict[str, TunableValue] | None = None, 

590 service: Service | None = None, 

591 ) -> Environment: 

592 # pylint: disable=too-many-arguments,too-many-positional-arguments 

593 """ 

594 Load and build new :py:class:`.Environment` from the config file or JSON string. 

595 

596 Parameters 

597 ---------- 

598 json : str 

599 The environment JSON configuration file or JSON string. 

600 tunables : TunableGroups 

601 A (possibly empty) collection of tunables to add to the environment. 

602 global_config : dict 

603 Global parameters to add to the environment config. 

604 parent_args : dict[str, TunableValue] 

605 An optional reference of the parent CompositeEnv's const_args used to 

606 expand dynamic config parameters from. 

607 service : Service 

608 An optional reference of the parent service to mix in. 

609 

610 Returns 

611 ------- 

612 env : Environment 

613 A new benchmarking environment. 

614 """ 

615 config = self.load_config(json, ConfigSchema.ENVIRONMENT) 

616 assert isinstance(config, dict) 

617 return self.build_environment(config, tunables, global_config, parent_args, service) 

618 

619 def load_environment_list( 

620 self, 

621 json: str, 

622 tunables: TunableGroups, 

623 global_config: dict[str, Any] | None = None, 

624 parent_args: dict[str, TunableValue] | None = None, 

625 service: Service | None = None, 

626 ) -> list[Environment]: 

627 # pylint: disable=too-many-arguments,too-many-positional-arguments 

628 """ 

629 Load and build a list of Environments from the config file or JSON string. 

630 

631 Parameters 

632 ---------- 

633 json : str 

634 The environment JSON configuration file or a JSON string. 

635 Can contain either one environment or a list of environments. 

636 tunables : TunableGroups 

637 An (possibly empty) collection of tunables to add to the environment. 

638 global_config : dict 

639 Global parameters to add to the environment config. 

640 service : Service 

641 An optional reference of the parent service to mix in. 

642 parent_args : dict[str, TunableValue] 

643 An optional reference of the parent CompositeEnv's const_args used to 

644 expand dynamic config parameters from. 

645 

646 Returns 

647 ------- 

648 env : list[Environment] 

649 A list of new benchmarking environments. 

650 """ 

651 config = self.load_config(json, ConfigSchema.ENVIRONMENT) 

652 return [self.build_environment(config, tunables, global_config, parent_args, service)] 

653 

654 def load_services( 

655 self, 

656 jsons: Iterable[str], 

657 global_config: dict[str, Any] | None = None, 

658 parent: Service | None = None, 

659 ) -> Service: 

660 """ 

661 Read the configuration files or JSON strings and bundle all Service methods from 

662 those configs into a single Service object. 

663 

664 Notes 

665 ----- 

666 Order of the services in the list matters. If multiple Services export the 

667 same method, the last one in the list will be used. 

668 

669 Parameters 

670 ---------- 

671 jsons : list of str 

672 A list of service JSON configuration files or JSON strings. 

673 global_config : dict 

674 Global parameters to add to the service config. 

675 parent : Service 

676 An optional reference of the parent service to mix in. 

677 

678 Returns 

679 ------- 

680 service : Service 

681 A collection of service methods. 

682 """ 

683 _LOG.info("Load services: %s parent: %s", jsons, parent.__class__.__name__) 

684 service = Service({}, global_config, parent) 

685 for json in jsons: 

686 config = self.load_config(json, ConfigSchema.SERVICE) 

687 service.register(self.build_service(config, global_config, service).export()) 

688 return service 

689 

690 def load_tunables( 

691 self, 

692 jsons: Iterable[str], 

693 parent: TunableGroups | None = None, 

694 ) -> TunableGroups: 

695 """ 

696 Load a collection of tunable parameters from JSON files or strings into the 

697 parent TunableGroup. 

698 

699 This helps allow standalone environment configs to reference 

700 overlapping tunable groups configs but still allow combining them into 

701 a single instance that each environment can reference. 

702 

703 Parameters 

704 ---------- 

705 jsons : list of str 

706 A list of JSON files or JSON strings to load. 

707 parent : TunableGroups 

708 A (possibly empty) collection of tunables to add to the new collection. 

709 

710 Returns 

711 ------- 

712 tunables : TunableGroups 

713 The larger collection of tunable parameters. 

714 """ 

715 _LOG.info("Load tunables: '%s'", jsons) 

716 if parent is None: 

717 parent = TunableGroups() 

718 tunables = parent.copy() 

719 for json in jsons: 

720 config = self.load_config(json, ConfigSchema.TUNABLE_PARAMS) 

721 assert isinstance(config, dict) 

722 tunables.merge(TunableGroups(config)) 

723 return tunables