Source code for nvflare.app_common.np.recipes.lr.fedavg

# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

from pydantic import BaseModel, PositiveInt, field_validator

from nvflare import FedJob
from nvflare.app_common.workflows.lr.fedavg import FedAvgLR
from nvflare.app_common.workflows.lr.np_persistor import LRModelPersistor
from nvflare.client.config import ExchangeFormat, TransferType
from nvflare.job_config.script_runner import FrameworkType, ScriptRunner
from nvflare.recipe.spec import Recipe
from nvflare.recipe.utils import validate_ckpt


# Internal — not part of the public API
class _FedAvgValidator(BaseModel):
    name: str
    min_clients: PositiveInt
    num_rounds: int
    damping_factor: float
    num_features: PositiveInt
    initial_ckpt: Optional[str] = None
    train_script: str
    train_args: str
    launch_external_process: bool = False
    command: str
    client_memory_gc_rounds: int = 0
    cuda_empty_cache: bool = False

    @field_validator("initial_ckpt")
    @classmethod
    def validate_initial_ckpt(cls, v):
        if v is not None:
            validate_ckpt(v)
        return v



[docs]
class FedAvgLrRecipe(Recipe):
    """A recipe for implementing Federated Averaging (FedAvg) for Logistic Regression with Newton Raphson.

    FedAvg is a fundamental federated learning algorithm that aggregates model updates
    from multiple clients by computing a weighted average based on the amount of local
    training data. This recipe sets up a complete federated learning workflow using
    the FedAvgLR controller specifically designed for logistic regression.

    The recipe configures:
    - A federated job with logistic regression model
    - FedAvgLR controller for Newton-Raphson based aggregation
    - Script runners for client-side training execution

    Args:
        name: Name of the federated learning job. Defaults to "lr_fedavg".
        min_clients: Minimum number of clients required to start a training round.
        num_rounds: Number of federated training rounds to execute. Defaults to 2.
        damping_factor: default to 0.8
        num_features: Number of features for the logistic regression. Defaults to 13.
        initial_ckpt: Absolute path to a pre-trained checkpoint file (.npy).
            The file may not exist locally as it could be on the server.
            Used to resume training from previously saved weights.
        train_script: Path to the training script that will be executed on each client.
        train_args: Command line arguments to pass to the training script.
        launch_external_process (bool): Whether to launch the script in external process. Defaults to False.
        command (str): If launch_external_process=True, command to run script (prepended to script). Defaults to "python3".

    Example:
        ```python
            recipe = FedAvgLrRecipe(min_clients=2,
                            num_rounds=num_rounds,
                            damping_factor=0.8,
                            num_features=13,
                            train_script="client.py",
                            train_args=f"--data_root {data_root}")
        ```
    """

    def __init__(
        self,
        *,
        name: str = "lr_fedavg",
        min_clients: int,
        num_rounds: int = 2,
        damping_factor=0.8,
        num_features=13,
        initial_ckpt: Optional[str] = None,
        train_script: str,
        train_args: str = "",
        launch_external_process=False,
        command: str = "python3 -u",
        client_memory_gc_rounds: int = 0,
        cuda_empty_cache: bool = False,
    ):
        # Validate inputs internally
        v = _FedAvgValidator(
            name=name,
            min_clients=min_clients,
            num_rounds=num_rounds,
            damping_factor=damping_factor,
            num_features=num_features,
            initial_ckpt=initial_ckpt,
            train_script=train_script,
            train_args=train_args,
            launch_external_process=launch_external_process,
            command=command,
            client_memory_gc_rounds=client_memory_gc_rounds,
            cuda_empty_cache=cuda_empty_cache,
        )

        self.name = v.name
        self.min_clients = v.min_clients
        self.num_rounds = v.num_rounds
        self.damping_factor = v.damping_factor
        self.initial_ckpt = v.initial_ckpt
        self.train_script = v.train_script
        self.train_args = v.train_args
        self.launch_external_process = v.launch_external_process
        self.command = v.command
        self.num_features = v.num_features
        self.client_memory_gc_rounds = v.client_memory_gc_rounds
        self.cuda_empty_cache = v.cuda_empty_cache

        # Create FedJob.
        job = FedJob(name=self.name, min_clients=self.min_clients)
        from nvflare.recipe.utils import prepare_initial_ckpt

        ckpt_path = prepare_initial_ckpt(self.initial_ckpt, job)
        persistor = LRModelPersistor(
            n_features=self.num_features,
            source_ckpt_file_full_name=ckpt_path,
        )
        persistor_id = job.to_server(persistor, id="lr_persistor")

        # Send custom controller to server
        controller = FedAvgLR(
            num_clients=self.min_clients,
            damping_factor=self.damping_factor,
            n_features=self.num_features,
            num_rounds=self.num_rounds,
            persistor_id=persistor_id,
        )
        job.to(controller, "server")

        runner = ScriptRunner(
            script=self.train_script,
            script_args=self.train_args,
            launch_external_process=self.launch_external_process,
            command=self.command,
            framework=FrameworkType.RAW,
            server_expected_format=ExchangeFormat.RAW,
            params_transfer_type=TransferType.FULL,
            memory_gc_rounds=self.client_memory_gc_rounds,
            cuda_empty_cache=self.cuda_empty_cache,
        )

        job.to_clients(runner)
        Recipe.__init__(self, job)