Coverage for mlos_bench/mlos_bench/config/schemas/config_schemas.py: 92%
72 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""A simple class for describing where to find different config schemas and validating
6configs against them.
7"""
9import json # schema files are pure json - no comments
10import logging
11from enum import Enum
12from os import environ, path, walk
13from typing import Dict, Iterator, Mapping
15import jsonschema
16from referencing import Registry, Resource
17from referencing.jsonschema import DRAFT202012
19from mlos_bench.util import path_join
21_LOG = logging.getLogger(__name__)
23# The path to find all config schemas.
24CONFIG_SCHEMA_DIR = path_join(path.dirname(__file__), abs_path=True)
26# Allow skipping schema validation for tight dev cycle changes.
27# It is used in `ConfigSchema.validate()` method below.
28# NOTE: this may cause pytest to fail if it's expecting exceptions
29# to be raised for invalid configs.
30_VALIDATION_ENV_FLAG = "MLOS_BENCH_SKIP_SCHEMA_VALIDATION"
31_SKIP_VALIDATION = environ.get(_VALIDATION_ENV_FLAG, "false").lower() in {
32 "true",
33 "y",
34 "yes",
35 "on",
36 "1",
37}
40# Note: we separate out the SchemaStore from a class method on ConfigSchema
41# because of issues with mypy/pylint and non-Enum-member class members.
42class SchemaStore(Mapping):
43 """A simple class for storing schemas and subschemas for the validator to
44 reference.
45 """
47 # A class member mapping of schema id to schema object.
48 _SCHEMA_STORE: Dict[str, dict] = {}
49 _REGISTRY: Registry = Registry()
51 def __len__(self) -> int:
52 return self._SCHEMA_STORE.__len__()
54 def __iter__(self) -> Iterator:
55 return self._SCHEMA_STORE.__iter__()
57 def __getitem__(self, key: str) -> dict:
58 """Gets the schema object for the given key."""
59 if not self._SCHEMA_STORE:
60 self._load_schemas()
61 return self._SCHEMA_STORE[key]
63 @classmethod
64 def _load_schemas(cls) -> None:
65 """Loads all schemas and subschemas into the schema store for the validator to
66 reference.
67 """
68 if cls._SCHEMA_STORE:
69 return
70 for root, _, files in walk(CONFIG_SCHEMA_DIR):
71 for file_name in files:
72 if not file_name.endswith(".json"):
73 continue
74 file_path = path_join(root, file_name)
75 if path.getsize(file_path) == 0:
76 continue
77 with open(file_path, mode="r", encoding="utf-8") as schema_file:
78 schema = json.load(schema_file)
79 cls._SCHEMA_STORE[file_path] = schema
80 # Let the schema be referenced by its id as well.
81 assert "$id" in schema
82 assert schema["$id"] not in cls._SCHEMA_STORE
83 cls._SCHEMA_STORE[schema["$id"]] = schema
85 @classmethod
86 def _load_registry(cls) -> None:
87 """Also store them in a Registry object for referencing by recent versions of
88 jsonschema.
89 """
90 if not cls._SCHEMA_STORE:
91 cls._load_schemas()
92 cls._REGISTRY = Registry().with_resources(
93 [
94 (url, Resource.from_contents(schema, default_specification=DRAFT202012))
95 for url, schema in cls._SCHEMA_STORE.items()
96 ]
97 )
99 @property
100 def registry(self) -> Registry:
101 """Returns a Registry object with all the schemas loaded."""
102 if not self._REGISTRY:
103 self._load_registry()
104 return self._REGISTRY
107SCHEMA_STORE = SchemaStore()
110class ConfigSchema(Enum):
111 """An enum to help describe schema types and help validate configs against them."""
113 CLI = path_join(CONFIG_SCHEMA_DIR, "cli/cli-schema.json")
114 GLOBALS = path_join(CONFIG_SCHEMA_DIR, "cli/globals-schema.json")
115 ENVIRONMENT = path_join(CONFIG_SCHEMA_DIR, "environments/environment-schema.json")
116 OPTIMIZER = path_join(CONFIG_SCHEMA_DIR, "optimizers/optimizer-schema.json")
117 SCHEDULER = path_join(CONFIG_SCHEMA_DIR, "schedulers/scheduler-schema.json")
118 SERVICE = path_join(CONFIG_SCHEMA_DIR, "services/service-schema.json")
119 STORAGE = path_join(CONFIG_SCHEMA_DIR, "storage/storage-schema.json")
120 TUNABLE_PARAMS = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-params-schema.json")
121 TUNABLE_VALUES = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-values-schema.json")
123 UNIFIED = path_join(CONFIG_SCHEMA_DIR, "mlos-bench-config-schema.json")
125 @property
126 def schema(self) -> dict:
127 """Gets the schema object for this type."""
128 schema = SCHEMA_STORE[self.value]
129 assert schema
130 return schema
132 def validate(self, config: dict) -> None:
133 """
134 Validates the given config against this schema.
136 Parameters
137 ----------
138 config : dict
139 The config to validate.
141 Raises
142 ------
143 jsonschema.exceptions.ValidationError
144 jsonschema.exceptions.SchemaError
145 """
146 if _SKIP_VALIDATION:
147 _LOG.warning("%s is set - skip schema validation", _VALIDATION_ENV_FLAG)
148 else:
149 jsonschema.Draft202012Validator(
150 schema=self.schema,
151 registry=SCHEMA_STORE.registry,
152 ).validate(config)