Source code for nvflare.job_config.stats_job

# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List

from nvflare import FedJob, FilterType
from nvflare.app_common.abstract.statistics_spec import Statistics
from nvflare.app_common.executors.statistics.statistics_executor import StatisticsExecutor
from nvflare.app_common.filters.statistics_privacy_filter import StatisticsPrivacyFilter
from nvflare.app_common.statistics.histogram_bins_cleanser import HistogramBinsCleanser
from nvflare.app_common.statistics.json_stats_file_persistor import JsonStatsFileWriter
from nvflare.app_common.statistics.min_count_cleanser import MinCountCleanser
from nvflare.app_common.statistics.min_max_cleanser import AddNoiseToMinMax
from nvflare.app_common.workflows.statistics_controller import StatisticsController


[docs] class StatsJob(FedJob): def __init__( self, job_name: str, statistic_configs: dict, stats_generator: Statistics, output_path: str, min_count: int = 10, min_noise_level=0.1, max_noise_level=0.3, max_bins_percent=10, ): super().__init__() self.writer_id = "stats_writer" self.stats_generator_id_prefix = "stats_generator" self.job_name = job_name self.stats_generator = stats_generator self.statistic_configs = statistic_configs self.output_path = output_path self.min_count = min_count self.min_noise_level = min_noise_level self.max_noise_level = max_noise_level self.max_bins_percent = max_bins_percent self.setup_server()
[docs] def setup_server(self): # define stats controller ctr = self.get_stats_controller() self.to(ctr, "server") # define stat writer to output Json file stats_writer = self.get_stats_output_writer() self.to(stats_writer, "server", id=self.writer_id)
[docs] def setup_client(self, sites: List[str]): # Client side job config # Add client site for site_id in sites: stats_generator_id = self.to(self.stats_generator, site_id, id=self.stats_generator_id_prefix) executor = StatisticsExecutor(generator_id=stats_generator_id) self.to(executor, site_id, tasks=["fed_stats_pre_run", "fed_stats"]) self.add_privacy_result_filters(site_id)
[docs] def get_stats_controller(self) -> StatisticsController: return StatisticsController( statistic_configs=self.statistic_configs, writer_id=self.writer_id, enable_pre_run_task=False )
[docs] def get_stats_output_writer(self): json_encoder_path = "nvflare.app_common.utils.json_utils.ObjectEncoder" return JsonStatsFileWriter(output_path=self.output_path, json_encoder_path=json_encoder_path)
[docs] def add_privacy_result_filters(self, site_id: str): # add privacy filters min_count_cleanser = MinCountCleanser(min_count=self.min_count) min_max_cleanser = AddNoiseToMinMax(min_noise_level=self.min_noise_level, max_noise_level=self.max_noise_level) hist_bins_cleanser = HistogramBinsCleanser(max_bins_percent=self.max_bins_percent) result_cleanser_ids = [ self.to(min_count_cleanser, site_id, id="min_count_cleanser"), self.to(min_max_cleanser, site_id, id="min_max_noise_cleanser"), self.to(hist_bins_cleanser, site_id, id="hist_bins_cleanser"), ] result_filter = StatisticsPrivacyFilter(result_cleanser_ids=result_cleanser_ids) self.to(result_filter, site_id, filter_type=FilterType.TASK_RESULT, tasks=["fed_stats"])