Coverage for mlos_bench/mlos_bench/config/schemas/config_schemas.py: 93%

85 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-14 01:58 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6A simple class for describing where to find different `json config schemas 

7<https://json-schema.org>`_ and validating configs against them. 

8 

9Used by the :py:class:`~mlos_bench.launcher.Launcher` and 

10:py:class:`~mlos_bench.services.config_persistence.ConfigPersistenceService` to 

11validate configs on load. 

12 

13Notes 

14----- 

15- See `mlos_bench/config/schemas/README.md 

16 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/>`_ 

17 for additional documentation in the source tree. 

18 

19- See `mlos_bench/config/README.md 

20 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/>`_ 

21 for additional config examples in the source tree. 

22""" 

23 

24import json # schema files are pure json - no comments 

25import logging 

26from enum import Enum 

27from os import environ, path, walk 

28from typing import Dict, Iterator, Mapping 

29 

30import jsonschema 

31from referencing import Registry, Resource 

32from referencing.jsonschema import DRAFT202012 

33 

34from mlos_bench.util import path_join 

35 

36_LOG = logging.getLogger(__name__) 

37 

38# The path to find all config schemas. 

39CONFIG_SCHEMA_DIR = path_join(path.dirname(__file__), abs_path=True) 

40"""The local directory where all config schemas shipped as a part of the 

41:py:mod:`mlos_bench` module are stored. 

42""" 

43 

44# Allow skipping schema validation for tight dev cycle changes. 

45# It is used in `ConfigSchema.validate()` method below. 

46# NOTE: this may cause pytest to fail if it's expecting exceptions 

47# to be raised for invalid configs. 

48VALIDATION_ENV_FLAG = "MLOS_BENCH_SKIP_SCHEMA_VALIDATION" 

49""" 

50The special environment flag to set to skip schema validation when "true". 

51 

52Useful for local development when you're making a lot of changes to the config or adding 

53new classes that aren't in the main repo yet. 

54""" 

55 

56_SKIP_VALIDATION = environ.get(VALIDATION_ENV_FLAG, "false").lower() in { 

57 "true", 

58 "y", 

59 "yes", 

60 "on", 

61 "1", 

62} 

63 

64 

65# Note: we separate out the SchemaStore from a class method on ConfigSchema 

66# because of issues with mypy/pylint and non-Enum-member class members. 

67class SchemaStore(Mapping): 

68 """A simple class for storing schemas and subschemas for the validator to 

69 reference. 

70 """ 

71 

72 # A class member mapping of schema id to schema object. 

73 _SCHEMA_STORE: Dict[str, dict] = {} 

74 _REGISTRY: Registry = Registry() 

75 

76 def __len__(self) -> int: 

77 return self._SCHEMA_STORE.__len__() 

78 

79 def __iter__(self) -> Iterator: 

80 return self._SCHEMA_STORE.__iter__() 

81 

82 def __getitem__(self, key: str) -> dict: 

83 """Gets the schema object for the given key.""" 

84 if not self._SCHEMA_STORE: 

85 self._load_schemas() 

86 return self._SCHEMA_STORE[key] 

87 

88 @classmethod 

89 def _load_schemas(cls) -> None: 

90 """Loads all schemas and subschemas into the schema store for the validator to 

91 reference. 

92 """ 

93 if cls._SCHEMA_STORE: 

94 return 

95 for root, _, files in walk(CONFIG_SCHEMA_DIR): 

96 for file_name in files: 

97 if not file_name.endswith(".json"): 

98 continue 

99 file_path = path_join(root, file_name) 

100 if path.getsize(file_path) == 0: 

101 continue 

102 with open(file_path, mode="r", encoding="utf-8") as schema_file: 

103 schema = json.load(schema_file) 

104 cls._SCHEMA_STORE[file_path] = schema 

105 # Let the schema be referenced by its id as well. 

106 assert "$id" in schema 

107 assert schema["$id"] not in cls._SCHEMA_STORE 

108 cls._SCHEMA_STORE[schema["$id"]] = schema 

109 

110 @classmethod 

111 def _load_registry(cls) -> None: 

112 """Also store them in a Registry object for referencing by recent versions of 

113 jsonschema. 

114 """ 

115 if not cls._SCHEMA_STORE: 

116 cls._load_schemas() 

117 cls._REGISTRY = Registry().with_resources( 

118 [ 

119 (url, Resource.from_contents(schema, default_specification=DRAFT202012)) 

120 for url, schema in cls._SCHEMA_STORE.items() 

121 ] 

122 ) 

123 

124 @property 

125 def registry(self) -> Registry: 

126 """Returns a Registry object with all the schemas loaded.""" 

127 if not self._REGISTRY: 

128 self._load_registry() 

129 return self._REGISTRY 

130 

131 

132SCHEMA_STORE = SchemaStore() 

133"""Static :py:class:`.SchemaStore` instance used for storing and retrieving schemas for 

134config validation. 

135""" 

136 

137 

138class ConfigSchema(Enum): 

139 """An enum to help describe schema types and help validate configs against them.""" 

140 

141 CLI = path_join(CONFIG_SCHEMA_DIR, "cli/cli-schema.json") 

142 """ 

143 Json config `schema 

144 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json>`__ 

145 for :py:mod:`mlos_bench <mlos_bench.run>` CLI configuration. 

146 

147 See Also 

148 -------- 

149 mlos_bench.config : documentation on the configuration system. 

150 mlos_bench.launcher.Launcher : class is responsible for processing the CLI args. 

151 """ 

152 

153 GLOBALS = path_join(CONFIG_SCHEMA_DIR, "cli/globals-schema.json") 

154 """ 

155 Json config `schema 

156 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/cli/globals-schema.json>`__ 

157 for :py:mod:`global variables <mlos_bench.config>`. 

158 """ 

159 

160 ENVIRONMENT = path_join(CONFIG_SCHEMA_DIR, "environments/environment-schema.json") 

161 """ 

162 Json config `schema 

163 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/environments/environment-schema.json>`__ 

164 for :py:mod:`~mlos_bench.environments`. 

165 """ 

166 

167 OPTIMIZER = path_join(CONFIG_SCHEMA_DIR, "optimizers/optimizer-schema.json") 

168 """ 

169 Json config `schema 

170 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json>`__ 

171 for :py:mod:`~mlos_bench.optimizers`. 

172 """ 

173 

174 SCHEDULER = path_join(CONFIG_SCHEMA_DIR, "schedulers/scheduler-schema.json") 

175 """ 

176 Json config `schema 

177 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json>`__ 

178 for :py:mod:`~mlos_bench.schedulers`. 

179 """ 

180 

181 SERVICE = path_join(CONFIG_SCHEMA_DIR, "services/service-schema.json") 

182 """ 

183 Json config `schema 

184 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/services/service-schema.json>`__ 

185 for :py:mod:`~mlos_bench.services`. 

186 """ 

187 

188 STORAGE = path_join(CONFIG_SCHEMA_DIR, "storage/storage-schema.json") 

189 """ 

190 Json config `schema 

191 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/storage/storage-schema.json>`__ 

192 for :py:mod:`~mlos_bench.storage` instances. 

193 """ 

194 

195 TUNABLE_PARAMS = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-params-schema.json") 

196 """ 

197 Json config `schema 

198 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/tunables/tunable-params-schema.json>`__ 

199 for :py:mod:`~mlos_bench.tunables` instances. 

200 """ 

201 

202 TUNABLE_VALUES = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-values-schema.json") 

203 """ 

204 Json config `schema 

205 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/tunables/tunable-values-schema.json>`__ 

206 for values of :py:mod:`~mlos_bench.tunables.tunable_groups.TunableGroups` instances. 

207 

208 These can be used to specify the values of the tunables for a given experiment 

209 using the :py:class:`~mlos_bench.optimizers.one_shot_optimizer.OneShotOptimizer` 

210 for instance. 

211 """ 

212 

213 UNIFIED = path_join(CONFIG_SCHEMA_DIR, "mlos-bench-config-schema.json") 

214 """ 

215 Combined global json `schema 

216 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/mlos-bench-config-schema.json>`__ 

217 use to validate any ``mlos_bench`` config file (e.g., ``*.mlos.jsonc`` files). 

218 

219 See Also 

220 -------- 

221 <https://www.schemastore.org/json/> 

222 """ 

223 

224 @property 

225 def schema(self) -> dict: 

226 """Gets the schema object for this type.""" 

227 schema = SCHEMA_STORE[self.value] 

228 assert schema 

229 return schema 

230 

231 def validate(self, config: dict) -> None: 

232 """ 

233 Validates the given config against this schema. 

234 

235 Parameters 

236 ---------- 

237 config : dict 

238 The config to validate. 

239 

240 Raises 

241 ------ 

242 jsonschema.exceptions.ValidationError 

243 On validation failure. 

244 jsonschema.exceptions.SchemaError 

245 On schema loading error. 

246 """ 

247 if _SKIP_VALIDATION: 

248 _LOG.warning("%s is set - skip schema validation", VALIDATION_ENV_FLAG) 

249 else: 

250 jsonschema.Draft202012Validator( 

251 schema=self.schema, 

252 registry=SCHEMA_STORE.registry, 

253 ).validate(config)