Coverage for mlos_bench/mlos_bench/config/schemas/config_schemas.py: 92%

72 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-07 01:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""A simple class for describing where to find different config schemas and validating 

6configs against them. 

7""" 

8 

9import json # schema files are pure json - no comments 

10import logging 

11from enum import Enum 

12from os import environ, path, walk 

13from typing import Dict, Iterator, Mapping 

14 

15import jsonschema 

16from referencing import Registry, Resource 

17from referencing.jsonschema import DRAFT202012 

18 

19from mlos_bench.util import path_join 

20 

21_LOG = logging.getLogger(__name__) 

22 

23# The path to find all config schemas. 

24CONFIG_SCHEMA_DIR = path_join(path.dirname(__file__), abs_path=True) 

25 

26# Allow skipping schema validation for tight dev cycle changes. 

27# It is used in `ConfigSchema.validate()` method below. 

28# NOTE: this may cause pytest to fail if it's expecting exceptions 

29# to be raised for invalid configs. 

30_VALIDATION_ENV_FLAG = "MLOS_BENCH_SKIP_SCHEMA_VALIDATION" 

31_SKIP_VALIDATION = environ.get(_VALIDATION_ENV_FLAG, "false").lower() in { 

32 "true", 

33 "y", 

34 "yes", 

35 "on", 

36 "1", 

37} 

38 

39 

40# Note: we separate out the SchemaStore from a class method on ConfigSchema 

41# because of issues with mypy/pylint and non-Enum-member class members. 

42class SchemaStore(Mapping): 

43 """A simple class for storing schemas and subschemas for the validator to 

44 reference. 

45 """ 

46 

47 # A class member mapping of schema id to schema object. 

48 _SCHEMA_STORE: Dict[str, dict] = {} 

49 _REGISTRY: Registry = Registry() 

50 

51 def __len__(self) -> int: 

52 return self._SCHEMA_STORE.__len__() 

53 

54 def __iter__(self) -> Iterator: 

55 return self._SCHEMA_STORE.__iter__() 

56 

57 def __getitem__(self, key: str) -> dict: 

58 """Gets the schema object for the given key.""" 

59 if not self._SCHEMA_STORE: 

60 self._load_schemas() 

61 return self._SCHEMA_STORE[key] 

62 

63 @classmethod 

64 def _load_schemas(cls) -> None: 

65 """Loads all schemas and subschemas into the schema store for the validator to 

66 reference. 

67 """ 

68 if cls._SCHEMA_STORE: 

69 return 

70 for root, _, files in walk(CONFIG_SCHEMA_DIR): 

71 for file_name in files: 

72 if not file_name.endswith(".json"): 

73 continue 

74 file_path = path_join(root, file_name) 

75 if path.getsize(file_path) == 0: 

76 continue 

77 with open(file_path, mode="r", encoding="utf-8") as schema_file: 

78 schema = json.load(schema_file) 

79 cls._SCHEMA_STORE[file_path] = schema 

80 # Let the schema be referenced by its id as well. 

81 assert "$id" in schema 

82 assert schema["$id"] not in cls._SCHEMA_STORE 

83 cls._SCHEMA_STORE[schema["$id"]] = schema 

84 

85 @classmethod 

86 def _load_registry(cls) -> None: 

87 """Also store them in a Registry object for referencing by recent versions of 

88 jsonschema. 

89 """ 

90 if not cls._SCHEMA_STORE: 

91 cls._load_schemas() 

92 cls._REGISTRY = Registry().with_resources( 

93 [ 

94 (url, Resource.from_contents(schema, default_specification=DRAFT202012)) 

95 for url, schema in cls._SCHEMA_STORE.items() 

96 ] 

97 ) 

98 

99 @property 

100 def registry(self) -> Registry: 

101 """Returns a Registry object with all the schemas loaded.""" 

102 if not self._REGISTRY: 

103 self._load_registry() 

104 return self._REGISTRY 

105 

106 

107SCHEMA_STORE = SchemaStore() 

108 

109 

110class ConfigSchema(Enum): 

111 """An enum to help describe schema types and help validate configs against them.""" 

112 

113 CLI = path_join(CONFIG_SCHEMA_DIR, "cli/cli-schema.json") 

114 GLOBALS = path_join(CONFIG_SCHEMA_DIR, "cli/globals-schema.json") 

115 ENVIRONMENT = path_join(CONFIG_SCHEMA_DIR, "environments/environment-schema.json") 

116 OPTIMIZER = path_join(CONFIG_SCHEMA_DIR, "optimizers/optimizer-schema.json") 

117 SCHEDULER = path_join(CONFIG_SCHEMA_DIR, "schedulers/scheduler-schema.json") 

118 SERVICE = path_join(CONFIG_SCHEMA_DIR, "services/service-schema.json") 

119 STORAGE = path_join(CONFIG_SCHEMA_DIR, "storage/storage-schema.json") 

120 TUNABLE_PARAMS = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-params-schema.json") 

121 TUNABLE_VALUES = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-values-schema.json") 

122 

123 UNIFIED = path_join(CONFIG_SCHEMA_DIR, "mlos-bench-config-schema.json") 

124 

125 @property 

126 def schema(self) -> dict: 

127 """Gets the schema object for this type.""" 

128 schema = SCHEMA_STORE[self.value] 

129 assert schema 

130 return schema 

131 

132 def validate(self, config: dict) -> None: 

133 """ 

134 Validates the given config against this schema. 

135 

136 Parameters 

137 ---------- 

138 config : dict 

139 The config to validate. 

140 

141 Raises 

142 ------ 

143 jsonschema.exceptions.ValidationError 

144 jsonschema.exceptions.SchemaError 

145 """ 

146 if _SKIP_VALIDATION: 

147 _LOG.warning("%s is set - skip schema validation", _VALIDATION_ENV_FLAG) 

148 else: 

149 jsonschema.Draft202012Validator( 

150 schema=self.schema, 

151 registry=SCHEMA_STORE.registry, 

152 ).validate(config)