Source code for nvflare.private.fed.app.client.client_train

# Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Federated client launching script."""

import argparse
import os
import sys
import time

from nvflare.apis.event_type import EventType
from nvflare.apis.fl_constant import WorkspaceConstants
from nvflare.fuel.common.excepts import ConfigError
from nvflare.fuel.sec.audit import AuditService
from nvflare.fuel.sec.security_content_service import SecurityContentService
from nvflare.fuel.utils.argument_utils import parse_vars
from nvflare.private.defs import AppFolderConstants, SSLConstants
from nvflare.private.fed.app.fl_conf import FLClientStarterConfiger
from nvflare.private.fed.client.admin import FedAdminAgent
from nvflare.private.fed.client.admin_msg_sender import AdminMessageSender
from nvflare.private.fed.client.client_engine import ClientEngine
from nvflare.private.fed.client.fed_client import FederatedClient
from nvflare.private.fed.utils.fed_utils import add_logfile_handler, secure_content_check


[docs]def main(): if sys.version_info >= (3, 9): raise RuntimeError("Python versions 3.9 and above are not yet supported. Please use Python 3.8 or 3.7.") if sys.version_info < (3, 7): raise RuntimeError("Python versions 3.6 and below are not supported. Please use Python 3.8 or 3.7.") parser = argparse.ArgumentParser() parser.add_argument("--workspace", "-m", type=str, help="WORKSPACE folder", required=True) parser.add_argument( "--fed_client", "-s", type=str, help="an aggregation server specification json file", required=True ) parser.add_argument("--set", metavar="KEY=VALUE", nargs="*") parser.add_argument("--local_rank", type=int, default=0) args = parser.parse_args() kv_list = parse_vars(args.set) config_folder = kv_list.get("config_folder", "") if config_folder == "": args.client_config = AppFolderConstants.CONFIG_FED_CLIENT else: args.client_config = os.path.join(config_folder, AppFolderConstants.CONFIG_FED_CLIENT) # TODO:: remove env and train config since they are not core args.env = os.path.join("config", AppFolderConstants.CONFIG_ENV) args.train_config = os.path.join("config", AppFolderConstants.CONFIG_TRAIN) args.log_config = None for name in [WorkspaceConstants.RESTART_FILE, WorkspaceConstants.SHUTDOWN_FILE]: try: f = os.path.join(args.workspace, name) if os.path.exists(f): os.remove(f) except BaseException: print("Could not remove the {} file. Please check your system before starting FL.".format(name)) sys.exit(-1) rank = args.local_rank try: os.chdir(args.workspace) AuditService.initialize(audit_file_name=WorkspaceConstants.AUDIT_LOG) startup = os.path.join(args.workspace, "startup") conf = FLClientStarterConfiger( app_root=startup, client_config_file_name=args.fed_client, log_config_file_name=WorkspaceConstants.LOGGING_CONFIG, kv_list=args.set, ) conf.configure() log_file = os.path.join(args.workspace, "log.txt") add_logfile_handler(log_file) deployer = conf.base_deployer security_check(secure_train=deployer.secure_train, content_folder=startup, fed_client_config=args.fed_client) federated_client = deployer.create_fed_client(args) while not federated_client.sp_established: print("Waiting for SP....") time.sleep(1.0) federated_client.use_gpu = False federated_client.config_folder = config_folder if rank == 0: federated_client.register() if not federated_client.token: print("The client could not register to server. ") raise RuntimeError("Login failed.") federated_client.start_heartbeat() servers = [{t["name"]: t["service"]} for t in deployer.server_config] admin_agent = create_admin_agent( deployer.client_config, deployer.client_name, deployer.req_processors, deployer.secure_train, sorted(servers)[0], federated_client, args, deployer.multi_gpu, rank, ) admin_agent.start() deployer.close() except ConfigError as ex: print("ConfigError:", str(ex)) finally: pass sys.exit(0)
[docs]def security_check(secure_train: bool, content_folder: str, fed_client_config: str): """To check the security content if running in security mode. Args: secure_train (bool): if run in secure mode or not. content_folder (str): the folder to check. fed_client_config (str): fed_client.json """ # initialize the SecurityContentService. # must do this before initializing other services since it may be needed by them! SecurityContentService.initialize(content_folder=content_folder) if secure_train: insecure_list = secure_content_check(fed_client_config, site_type="client") if len(insecure_list): print("The following files are not secure content.") for item in insecure_list: print(item) sys.exit(1) # initialize the AuditService, which is used by command processing. # The Audit Service can be used in other places as well. AuditService.initialize(audit_file_name=WorkspaceConstants.AUDIT_LOG)
[docs]def create_admin_agent( client_args, client_id, req_processors, secure_train, server_args, federated_client: FederatedClient, args, is_multi_gpu, rank, ): """Creates an admin agent. Args: client_args: start client command args client_id: client name req_processors: request processors secure_train: True/False server_args: FL server args federated_client: FL client object args: command args is_multi_gpu: True/False rank: client rank process number Returns: A FedAdminAgent. """ sender = AdminMessageSender( client_name=federated_client.token, root_cert=client_args[SSLConstants.ROOT_CERT], ssl_cert=client_args[SSLConstants.CERT], private_key=client_args[SSLConstants.PRIVATE_KEY], server_args=server_args, secure=secure_train, is_multi_gpu=is_multi_gpu, rank=rank, ) client_engine = ClientEngine(federated_client, federated_client.token, sender, args, rank) admin_agent = FedAdminAgent( client_name="admin_agent", sender=sender, app_ctx=client_engine, ) admin_agent.app_ctx.set_agent(admin_agent) federated_client.set_client_engine(client_engine) for processor in req_processors: admin_agent.register_processor(processor) client_engine.fire_event(EventType.SYSTEM_START, client_engine.new_context()) return admin_agent
if __name__ == "__main__": """ This is the main program when starting the NVIDIA FLARE client process. """ # # For MacOS, it needs to use 'spawn' for creating multi-process. # if os.name == 'posix': # import multiprocessing # multiprocessing.set_start_method('spawn') # import multiprocessing # multiprocessing.set_start_method('spawn') main()