Source code for nvflare.app_common.ccwf.common

# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union

from nvflare.app_common.abstract.metric_comparator import MetricComparator


[docs]class Constant:

    TN_PREFIX_CYCLIC = "cyclic"
    TN_PREFIX_SWARM = "swarm"
    TN_PREFIX_CROSS_SITE_EVAL = "cse"

    BASENAME_CONFIG = "config"
    BASENAME_START = "start"
    BASENAME_LEARN = "learn"
    BASENAME_EVAL = "eval"
    BASENAME_REPORT_LEARN_RESULT = "report_learn_result"
    BASENAME_REPORT_FINAL_RESULT = "report_final_result"
    BASENAME_ASK_FOR_MODEL = "ask_for_model"

    TASK_NAME_PREFIX = "cwf.task_prefix"
    PRIVATE_P2P = "cwf.private_p2p"
    ERROR = "cwf.error"
    ORDER = "cwf.order"
    CLIENTS = "cwf.clients"
    START_CLIENT = "cwf.start_client"
    RESULT_CLIENTS = "cwf.result_clients"
    CLIENT_ORDER = "cwf.client_order"
    LAST_ROUND = "cwf.last_round"
    START_ROUND = "cwf.start_round"
    TIMESTAMP = "cwf.timestamp"
    ACTION = "cwf.action"
    ALL_DONE = "cwf.all_done"
    AGGR_CLIENTS = "cwf.aggr_clients"
    TRAIN_CLIENTS = "cwf.train_clients"
    AGGREGATOR = "cwf.aggr"
    METRIC = "cwf.metric"
    CLIENT = "cwf.client"
    ROUND = "cwf.round"
    CONFIG = "cwf.config"
    STATUS_REPORTS = "cwf.status_reports"
    RESULT = "cwf.result"
    RESULT_TYPE = "cwf.result_type"
    EVAL_LOCAL = "cwf.eval_local"
    EVAL_GLOBAL = "cwf.eval_global"
    EVALUATORS = "cwf.evaluators"
    EVALUATEES = "cwf.evaluatees"
    GLOBAL_CLIENT = "cwf.global_client"
    MODEL_OWNER = "cwf.model_owner"
    MODEL_NAME = "cwf.model_name"
    MODEL_TYPE = "cwf.model_type"
    GLOBAL_NAMES = "cwf.global_names"
    EXECUTOR = "cwf.executor"
    EXECUTOR_INITIALIZED = "cwf.executor_initialized"
    EXECUTOR_FINALIZED = "cwf.executor_finalized"

    TOPIC_SHARE_RESULT = "cwf.share_result"
    TOPIC_END_WORKFLOW = "cwf.end_wf"

    RC_NO_GLOBAL_MODELS = "cwf.no_global_models"
    RC_NO_LOCAL_MODEL = "cwf.no_local_model"
    RC_UNABLE_TO_EVAL = "cwf.unable_to_eval"

    CONFIG_TASK_TIMEOUT = 300
    START_TASK_TIMEOUT = 10
    END_WORKFLOW_TIMEOUT = 2.0
    TASK_CHECK_INTERVAL = 0.5
    JOB_STATUS_CHECK_INTERVAL = 2.0
    PER_CLIENT_STATUS_REPORT_TIMEOUT = 90.0
    WORKFLOW_PROGRESS_TIMEOUT = 3600.0

    LEARN_TASK_CHECK_INTERVAL = 1.0
    LEARN_TASK_ACK_TIMEOUT = 10
    LEARN_TASK_ABORT_TIMEOUT = 5.0
    FINAL_RESULT_ACK_TIMEOUT = 10
    GET_MODEL_TIMEOUT = 10
    MAX_TASK_TIMEOUT = 3600

    PROP_KEY_TRAIN_CLIENTS = "cwf.train_clients"


[docs]class ModelType:

    LOCAL = "local"
    GLOBAL = "global"


[docs]class ResultType:

    BEST = "best"
    LAST = "last"


[docs]class CyclicOrder:

    FIXED = "fixed"
    RANDOM = "random"


[docs]class StatusReport:
    def __init__(
        self,
        timestamp=None,
        action: str = "",
        last_round=None,
        all_done=False,
        error: str = "",
    ):
        self.timestamp = timestamp
        self.action = action
        self.last_round = last_round
        self.all_done = all_done
        self.error = error

[docs]    def to_dict(self) -> dict:
        result = {
            Constant.TIMESTAMP: self.timestamp,
            Constant.ACTION: self.action,
            Constant.ALL_DONE: self.all_done,
        }

        if self.last_round is not None:
            result[Constant.LAST_ROUND] = self.last_round

        if self.error:
            result[Constant.ERROR] = self.error
        return result

    def __eq__(self, other):
        if not isinstance(other, StatusReport):
            # don't attempt to compare against unrelated types
            return ValueError(f"cannot compare to object of type {type(other)}")

        return (
            self.last_round == other.last_round
            and self.timestamp == other.timestamp
            and self.action == other.action
            and self.all_done == other.all_done
            and self.error == other.error
        )


[docs]def status_report_from_dict(d: dict) -> StatusReport:
    last_round = d.get(Constant.LAST_ROUND)
    timestamp = d.get(Constant.TIMESTAMP)
    all_done = d.get(Constant.ALL_DONE)
    error = d.get(Constant.ERROR)
    action = d.get(Constant.ACTION)

    return StatusReport(
        last_round=last_round,
        timestamp=timestamp,
        action=action,
        all_done=all_done,
        error=error,
    )


[docs]def rotate_to_front(item, items: list):
    num_items = len(items)
    idx = items.index(item)
    if idx != 0:
        new_list = [None] * num_items
        for i in range(num_items):
            new_pos = i - idx
            if new_pos < 0:
                new_pos += num_items
            new_list[new_pos] = items[i]

        for i in range(num_items):
            items[i] = new_list[i]


[docs]def topic_for_end_workflow(wf_id):
    return f"{Constant.TOPIC_END_WORKFLOW}.{wf_id}"


[docs]def make_task_name(prefix: str, base_name: str) -> str:
    return f"{prefix}_{base_name}"


[docs]class NumberMetricComparator(MetricComparator):
[docs]    def compare(self, a, b) -> Union[int, float]:
        if not isinstance(a, (int, float)):
            raise ValueError(f"metric value must be a number but got {type(a)}")

        if not isinstance(b, (int, float)):
            raise ValueError(f"metric value must be a number but got {type(b)}")

        return a - b