Source code for nvflare.private.fed.client.client_app_runner

# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os

from nvflare.apis.fl_constant import FLContextKey, SystemConfigs
from nvflare.apis.fl_context import FLContext
from nvflare.apis.workspace import Workspace
from nvflare.fuel.utils.config_service import ConfigService
from nvflare.private.admin_defs import Message
from nvflare.private.defs import CellChannel, EngineConstant, RequestHeader, TrainingTopic, new_cell_message
from nvflare.private.fed.app.fl_conf import create_privacy_manager
from nvflare.private.fed.client.client_json_config import ClientJsonConfigurator
from nvflare.private.fed.client.client_run_manager import ClientRunManager
from nvflare.private.fed.client.client_runner import ClientRunner
from nvflare.private.fed.client.client_status import ClientStatus
from nvflare.private.fed.client.command_agent import CommandAgent
from nvflare.private.fed.runner import Runner
from nvflare.private.fed.utils.fed_utils import authorize_build_component
from nvflare.private.privacy_manager import PrivacyService


[docs]class ClientAppRunner(Runner): logger = logging.getLogger("ClientAppRunner") def __init__(self, time_out=60.0) -> None: super().__init__() self.command_agent = None self.timeout = time_out self.client_runner = None
[docs] def start_run(self, app_root, args, config_folder, federated_client, secure_train, sp, event_handlers): self.client_runner = self.create_client_runner( app_root, args, config_folder, federated_client, secure_train, event_handlers ) federated_client.set_client_runner(self.client_runner) federated_client.set_primary_sp(sp) with self.client_runner.engine.new_context() as fl_ctx: self.start_command_agent(args, federated_client, fl_ctx) self.sync_up_parents_process(federated_client) federated_client.start_overseer_agent() notify_timeout = ConfigService.get_float_var( name="notify_timeout", conf=SystemConfigs.APPLICATION_CONF, default=5.0 ) self.notify_job_status(federated_client, args.job_id, ClientStatus.STARTED, timeout=notify_timeout) federated_client.status = ClientStatus.STARTED self.client_runner.run(app_root, args) self.notify_job_status(federated_client, args.job_id, ClientStatus.STOPPED, timeout=notify_timeout) federated_client.status = ClientStatus.STOPPED federated_client.stop_cell()
@staticmethod def _set_fl_context(fl_ctx: FLContext, app_root, args, workspace, secure_train): fl_ctx.set_prop(FLContextKey.CLIENT_NAME, args.client_name, private=False) fl_ctx.set_prop(EngineConstant.FL_TOKEN, args.token, private=False) fl_ctx.set_prop(FLContextKey.WORKSPACE_ROOT, args.workspace, private=True) fl_ctx.set_prop(FLContextKey.ARGS, args, sticky=True) fl_ctx.set_prop(FLContextKey.APP_ROOT, app_root, private=True, sticky=True) fl_ctx.set_prop(FLContextKey.WORKSPACE_OBJECT, workspace, private=True) fl_ctx.set_prop(FLContextKey.SECURE_MODE, secure_train, private=True, sticky=True) fl_ctx.set_prop(FLContextKey.CURRENT_RUN, args.job_id, private=False, sticky=True) fl_ctx.set_prop(FLContextKey.CURRENT_JOB_ID, args.job_id, private=False, sticky=True)
[docs] def create_client_runner(self, app_root, args, config_folder, federated_client, secure_train, event_handlers=None): workspace = Workspace(args.workspace, args.client_name, config_folder) fl_ctx = FLContext() self._set_fl_context(fl_ctx, app_root, args, workspace, secure_train) client_config_file_name = os.path.join(app_root, args.client_config) args.set.append(f"secure_train={secure_train}") conf = ClientJsonConfigurator( config_file_name=client_config_file_name, app_root=app_root, args=args, kv_list=args.set ) if event_handlers: conf.set_component_build_authorizer(authorize_build_component, fl_ctx=fl_ctx, event_handlers=event_handlers) conf.configure() runner_config = conf.runner_config # configure privacy control! privacy_manager = create_privacy_manager(workspace, names_only=False) if privacy_manager.is_policy_defined(): if privacy_manager.components: for cid, comp in privacy_manager.components.items(): runner_config.add_component(cid, comp) # initialize Privacy Service PrivacyService.initialize(privacy_manager) run_manager = self.create_run_manager(args, conf, federated_client, workspace) federated_client.run_manager = run_manager federated_client.handlers = conf.runner_config.handlers with run_manager.new_context() as fl_ctx: self._set_fl_context(fl_ctx, app_root, args, workspace, secure_train) client_runner = ClientRunner(config=conf.runner_config, job_id=args.job_id, engine=run_manager) run_manager.add_handler(client_runner) fl_ctx.set_prop(FLContextKey.RUNNER, client_runner, private=True) # self.start_command_agent(args, client_runner, federated_client, fl_ctx) return client_runner
[docs] def create_run_manager(self, args, conf, federated_client, workspace): return ClientRunManager( client_name=args.client_name, job_id=args.job_id, workspace=workspace, client=federated_client, components=conf.runner_config.components, handlers=conf.runner_config.handlers, conf=conf, )
[docs] def start_command_agent(self, args, federated_client, fl_ctx): # Start the command agent self.command_agent = CommandAgent(federated_client) self.command_agent.start(fl_ctx)
[docs] def sync_up_parents_process(self, federated_client): run_manager = federated_client.run_manager with run_manager.new_context() as fl_ctx: run_manager.get_all_clients_from_server(fl_ctx)
[docs] def notify_job_status(self, federated_client, job_id, status, timeout=5.0): message = Message(topic=TrainingTopic.NOTIFY_JOB_STATUS, body="") message.set_header(RequestHeader.JOB_ID, str(job_id)) message.set_header(RequestHeader.JOB_STATUS, status) federated_client.cell.send_request( target=federated_client.client_name, channel=CellChannel.CLIENT_MAIN, topic=message.topic, request=new_cell_message({}, message), timeout=timeout, )
[docs] def close(self): if self.command_agent: self.command_agent.shutdown()
[docs] def stop(self): if self.client_runner: self.client_runner.abort()