Source code for nvflare.app_common.np.recipes.lr.fedavg

# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

from pydantic import BaseModel, PositiveInt, field_validator

from nvflare import FedJob
from nvflare.app_common.workflows.lr.fedavg import FedAvgLR
from nvflare.app_common.workflows.lr.np_persistor import LRModelPersistor
from nvflare.client.config import ExchangeFormat, TransferType
from nvflare.job_config.script_runner import FrameworkType, ScriptRunner
from nvflare.recipe.spec import Recipe
from nvflare.recipe.utils import validate_ckpt


# Internal — not part of the public API
class _FedAvgValidator(BaseModel):
    name: str
    min_clients: PositiveInt
    num_rounds: int
    damping_factor: float
    num_features: PositiveInt
    initial_ckpt: Optional[str] = None
    train_script: str
    train_args: str
    launch_external_process: bool = False
    command: str
    client_memory_gc_rounds: int = 0
    cuda_empty_cache: bool = False

    @field_validator("initial_ckpt")
    @classmethod
    def validate_initial_ckpt(cls, v):
        if v is not None:
            validate_ckpt(v)
        return v


[docs] class FedAvgLrRecipe(Recipe): """A recipe for implementing Federated Averaging (FedAvg) for Logistic Regression with Newton Raphson. FedAvg is a fundamental federated learning algorithm that aggregates model updates from multiple clients by computing a weighted average based on the amount of local training data. This recipe sets up a complete federated learning workflow using the FedAvgLR controller specifically designed for logistic regression. The recipe configures: - A federated job with logistic regression model - FedAvgLR controller for Newton-Raphson based aggregation - Script runners for client-side training execution Args: name: Name of the federated learning job. Defaults to "lr_fedavg". min_clients: Minimum number of clients required to start a training round. num_rounds: Number of federated training rounds to execute. Defaults to 2. damping_factor: default to 0.8 num_features: Number of features for the logistic regression. Defaults to 13. initial_ckpt: Absolute path to a pre-trained checkpoint file (.npy). The file may not exist locally as it could be on the server. Used to resume training from previously saved weights. train_script: Path to the training script that will be executed on each client. train_args: Command line arguments to pass to the training script. launch_external_process (bool): Whether to launch the script in external process. Defaults to False. command (str): If launch_external_process=True, command to run script (prepended to script). Defaults to "python3". Example: ```python recipe = FedAvgLrRecipe(min_clients=2, num_rounds=num_rounds, damping_factor=0.8, num_features=13, train_script="client.py", train_args=f"--data_root {data_root}") ``` """ def __init__( self, *, name: str = "lr_fedavg", min_clients: int, num_rounds: int = 2, damping_factor=0.8, num_features=13, initial_ckpt: Optional[str] = None, train_script: str, train_args: str = "", launch_external_process=False, command: str = "python3 -u", client_memory_gc_rounds: int = 0, cuda_empty_cache: bool = False, ): # Validate inputs internally v = _FedAvgValidator( name=name, min_clients=min_clients, num_rounds=num_rounds, damping_factor=damping_factor, num_features=num_features, initial_ckpt=initial_ckpt, train_script=train_script, train_args=train_args, launch_external_process=launch_external_process, command=command, client_memory_gc_rounds=client_memory_gc_rounds, cuda_empty_cache=cuda_empty_cache, ) self.name = v.name self.min_clients = v.min_clients self.num_rounds = v.num_rounds self.damping_factor = v.damping_factor self.initial_ckpt = v.initial_ckpt self.train_script = v.train_script self.train_args = v.train_args self.launch_external_process = v.launch_external_process self.command = v.command self.num_features = v.num_features self.client_memory_gc_rounds = v.client_memory_gc_rounds self.cuda_empty_cache = v.cuda_empty_cache # Create FedJob. job = FedJob(name=self.name, min_clients=self.min_clients) from nvflare.recipe.utils import prepare_initial_ckpt ckpt_path = prepare_initial_ckpt(self.initial_ckpt, job) persistor = LRModelPersistor( n_features=self.num_features, source_ckpt_file_full_name=ckpt_path, ) persistor_id = job.to_server(persistor, id="lr_persistor") # Send custom controller to server controller = FedAvgLR( num_clients=self.min_clients, damping_factor=self.damping_factor, n_features=self.num_features, num_rounds=self.num_rounds, persistor_id=persistor_id, ) job.to(controller, "server") runner = ScriptRunner( script=self.train_script, script_args=self.train_args, launch_external_process=self.launch_external_process, command=self.command, framework=FrameworkType.RAW, server_expected_format=ExchangeFormat.RAW, params_transfer_type=TransferType.FULL, memory_gc_rounds=self.client_memory_gc_rounds, cuda_empty_cache=self.cuda_empty_cache, ) job.to_clients(runner) Recipe.__init__(self, job)