Coverage for mlos_bench/mlos_bench/environments/remote/remote_env.py: 88%
68 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-14 01:58 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-14 01:58 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Remotely executed benchmark/script environment.
8e.g. Application Environment
10TODO: Documentat how variable propogation works in the remote environments.
11"""
13import logging
14import re
15from datetime import datetime
16from typing import Dict, Iterable, Optional, Tuple
18from pytz import UTC
20from mlos_bench.environments.script_env import ScriptEnv
21from mlos_bench.environments.status import Status
22from mlos_bench.services.base_service import Service
23from mlos_bench.services.types.host_ops_type import SupportsHostOps
24from mlos_bench.services.types.remote_exec_type import SupportsRemoteExec
25from mlos_bench.tunables.tunable import TunableValue
26from mlos_bench.tunables.tunable_groups import TunableGroups
28_LOG = logging.getLogger(__name__)
31class RemoteEnv(ScriptEnv):
32 """
33 Environment to run benchmarks and scripts on a remote host OS.
35 e.g. Application Environment
36 """
38 _RE_SPECIAL = re.compile(r"\W+")
40 def __init__( # pylint: disable=too-many-arguments
41 self,
42 *,
43 name: str,
44 config: dict,
45 global_config: Optional[dict] = None,
46 tunables: Optional[TunableGroups] = None,
47 service: Optional[Service] = None,
48 ):
49 """
50 Create a new environment for remote execution.
52 Parameters
53 ----------
54 name: str
55 Human-readable name of the environment.
56 config : dict
57 Free-format dictionary that contains the benchmark environment
58 configuration. Each config must have at least the "tunable_params"
59 and the "const_args" sections.
60 `RemoteEnv` must also have at least some of the following parameters:
61 {setup, run, teardown, wait_boot}
62 global_config : dict
63 Free-format dictionary of global parameters (e.g., security credentials)
64 to be mixed in into the "const_args" section of the local config.
65 tunables : TunableGroups
66 A collection of tunable parameters for *all* environments.
67 service: Service
68 An optional service object (e.g., providing methods to
69 deploy or reboot a Host, VM, OS, etc.).
70 """
71 super().__init__(
72 name=name,
73 config=config,
74 global_config=global_config,
75 tunables=tunables,
76 service=service,
77 )
79 self._wait_boot = self.config.get("wait_boot", False)
80 self._command_prefix = "mlos-" + self._RE_SPECIAL.sub("-", self.name).lower() + "-"
82 assert self._service is not None and isinstance(
83 self._service, SupportsRemoteExec
84 ), "RemoteEnv requires a service that supports remote execution operations"
85 self._remote_exec_service: SupportsRemoteExec = self._service
87 if self._wait_boot:
88 assert self._service is not None and isinstance(
89 self._service, SupportsHostOps
90 ), "RemoteEnv requires a service that supports host operations"
91 self._host_service: SupportsHostOps = self._service
93 def setup(self, tunables: TunableGroups, global_config: Optional[dict] = None) -> bool:
94 """
95 Check if the environment is ready and set up the application and benchmarks on a
96 remote host.
98 Parameters
99 ----------
100 tunables : TunableGroups
101 A collection of tunable OS and application parameters along with their
102 values. Setting these parameters should not require an OS reboot.
103 global_config : dict
104 Free-format dictionary of global parameters of the environment
105 that are not used in the optimization process.
107 Returns
108 -------
109 is_success : bool
110 True if operation is successful, false otherwise.
111 """
112 if not super().setup(tunables, global_config):
113 return False
115 if self._wait_boot:
116 _LOG.info("Wait for the remote environment to start: %s", self)
117 (status, params) = self._host_service.start_host(self._params)
118 if status.is_pending():
119 (status, _) = self._host_service.wait_host_operation(params)
120 if not status.is_succeeded():
121 return False
123 if self._script_setup:
124 _LOG.info("Set up the remote environment: %s", self)
125 (status, _timestamp, _output) = self._remote_exec("setup", self._script_setup)
126 _LOG.info("Remote set up complete: %s :: %s", self, status)
127 self._is_ready = status.is_succeeded()
128 else:
129 self._is_ready = True
131 return self._is_ready
133 def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]:
134 """
135 Runs the run script on the remote environment.
137 This can be used to, for instance, submit a new experiment to the
138 remote application environment by (re)configuring an application and
139 launching the benchmark, or run a script that collects the results.
141 Returns
142 -------
143 (status, timestamp, output) : (Status, datetime.datetime, dict)
144 3-tuple of (Status, timestamp, output) values, where `output` is a dict
145 with the results or None if the status is not COMPLETED.
146 If run script is a benchmark, then the score is usually expected to
147 be in the `score` field.
148 """
149 _LOG.info("Run script remotely on: %s", self)
150 (status, timestamp, _) = result = super().run()
151 if not (status.is_ready() and self._script_run):
152 return result
154 (status, timestamp, output) = self._remote_exec("run", self._script_run)
155 if status.is_succeeded() and output is not None:
156 output = self._extract_stdout_results(output.get("stdout", ""))
157 _LOG.info("Remote run complete: %s :: %s = %s", self, status, output)
158 return (status, timestamp, output)
160 def teardown(self) -> None:
161 """Clean up and shut down the remote environment."""
162 if self._script_teardown:
163 _LOG.info("Remote teardown: %s", self)
164 (status, _timestamp, _output) = self._remote_exec("teardown", self._script_teardown)
165 _LOG.info("Remote teardown complete: %s :: %s", self, status)
166 super().teardown()
168 def _remote_exec(
169 self,
170 command_name: str,
171 script: Iterable[str],
172 ) -> Tuple[Status, datetime, Optional[dict]]:
173 """
174 Run a script on the remote host.
176 Parameters
177 ----------
178 command_name : str
179 Name of the command to be executed on the remote host.
180 script : [str]
181 List of commands to be executed on the remote host.
183 Returns
184 -------
185 result : (Status, datetime.datetime, dict)
186 3-tuple of Status, timestamp, and dict with the benchmark/script results.
187 Status is one of {PENDING, SUCCEEDED, FAILED, TIMED_OUT}
188 """
189 env_params = self._get_env_params()
190 command_name = self._command_prefix + command_name
191 _LOG.debug("Submit command: %s with %s", command_name, env_params)
192 (status, output) = self._remote_exec_service.remote_exec(
193 script,
194 config={
195 **self._params,
196 "commandName": command_name,
197 },
198 env_params=env_params,
199 )
200 _LOG.debug("Script submitted: %s %s :: %s", self, status, output)
201 if status in {Status.PENDING, Status.SUCCEEDED}:
202 (status, output) = self._remote_exec_service.get_remote_exec_results(output)
203 _LOG.debug("Status: %s :: %s", status, output)
204 # FIXME: get the timestamp from the remote environment!
205 timestamp = datetime.now(UTC)
206 return (status, timestamp, output)