Source code for nvflare.app_common.ccwf.swarm_server_ctl

# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nvflare.apis.fl_context import FLContext
from nvflare.app_common.ccwf.common import Constant
from nvflare.app_common.ccwf.server_ctl import ServerSideController
from nvflare.fuel.utils.validation_utils import DefaultValuePolicy, normalize_config_arg, validate_candidates


[docs]class SwarmServerController(ServerSideController):
    def __init__(
        self,
        num_rounds: int,
        start_round: int = 0,
        task_name_prefix=Constant.TN_PREFIX_SWARM,
        start_task_timeout=Constant.START_TASK_TIMEOUT,
        configure_task_timeout=Constant.CONFIG_TASK_TIMEOUT,
        task_check_period: float = Constant.TASK_CHECK_INTERVAL,
        job_status_check_interval: float = Constant.JOB_STATUS_CHECK_INTERVAL,
        participating_clients=None,
        result_clients=None,
        starting_client: str = "",
        max_status_report_interval: float = Constant.PER_CLIENT_STATUS_REPORT_TIMEOUT,
        progress_timeout: float = Constant.WORKFLOW_PROGRESS_TIMEOUT,
        private_p2p: bool = True,
        aggr_clients=None,
        train_clients=None,
    ):
        result_clients = normalize_config_arg(result_clients)
        starting_client = normalize_config_arg(starting_client)
        if starting_client is None:
            raise ValueError("starting_client must be specified")

        super().__init__(
            num_rounds=num_rounds,
            start_round=start_round,
            task_name_prefix=task_name_prefix,
            start_task_timeout=start_task_timeout,
            configure_task_timeout=configure_task_timeout,
            task_check_period=task_check_period,
            job_status_check_interval=job_status_check_interval,
            participating_clients=participating_clients,
            result_clients=result_clients,
            result_clients_policy=DefaultValuePolicy.ALL,
            starting_client=starting_client,
            starting_client_policy=DefaultValuePolicy.ANY,
            max_status_report_interval=max_status_report_interval,
            progress_timeout=progress_timeout,
            private_p2p=private_p2p,
        )
        if not train_clients:
            train_clients = []

        if not aggr_clients:
            aggr_clients = []

        self.aggr_clients = aggr_clients
        self.train_clients = train_clients

[docs]    def start_controller(self, fl_ctx: FLContext):
        super().start_controller(fl_ctx)

        self.train_clients = validate_candidates(
            var_name="train_clients",
            candidates=self.train_clients,
            base=self.participating_clients,
            default_policy=DefaultValuePolicy.ALL,
            allow_none=False,
        )

        self.aggr_clients = validate_candidates(
            var_name="aggr_clients",
            candidates=self.aggr_clients,
            base=self.participating_clients,
            default_policy=DefaultValuePolicy.ALL,
            allow_none=False,
        )

        # make sure every participating client is either training or aggr client
        for c in self.participating_clients:
            if c not in self.train_clients and c not in self.aggr_clients:
                raise RuntimeError(f"Config Error:  client {c} is neither train client nor aggr client")

        # set train_clients as a sticky prop in fl_ctx
        # in case CSE (cross site eval) workflow follows, it will know that only training clients have local models
        fl_ctx.set_prop(key=Constant.PROP_KEY_TRAIN_CLIENTS, value=self.train_clients, private=True, sticky=True)

[docs]    def prepare_config(self):
        return {Constant.AGGR_CLIENTS: self.aggr_clients, Constant.TRAIN_CLIENTS: self.train_clients}