Source code for nvflare.app_opt.statistics.visualization.statistics_visualization

# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict

from nvflare.fuel.utils.import_utils import optional_import


[docs]def convert_data(feature_metrics) -> dict: converted = {} for statistic in feature_metrics: converted[statistic] = {} for site in feature_metrics[statistic]: for ds in feature_metrics[statistic][site]: site_dataset = f"{site}-{ds}" converted[statistic][site_dataset] = feature_metrics[statistic][site][ds] return converted
[docs]class Visualization:
[docs] def import_modules(self): display, import_flag = optional_import(module="IPython.display", name="display") if not import_flag: print(display.failure) pd, import_flag = optional_import(module="pandas") if not import_flag: print(pd.failure) return display, pd
[docs] def show_stats(self, data, white_list_features=None): if white_list_features is None: white_list_features = [] display, pd = self.import_modules() all_features = [k for k in data] target_features = self._get_target_features(all_features, white_list_features) for feature in target_features: print(f"\n{feature}\n") feature_metrics = data[feature] converted = convert_data(feature_metrics) df = pd.DataFrame.from_dict(converted) display(df)
[docs] def show_histograms(self, data, display_format="sample_count", white_list_features=None, plot_type="both"): if white_list_features is None: white_list_features = [] feature_dfs = self.get_histogram_dataframes(data, display_format, white_list_features) self.show_dataframe_plots(feature_dfs, plot_type)
[docs] def show_dataframe_plots(self, feature_dfs, plot_type="both"): for feature in feature_dfs: df = feature_dfs[feature] if plot_type == "both": axes = df.plot.line(rot=40, title=feature) axes = df.plot.line(rot=40, subplots=True, title=feature) elif plot_type == "main": axes = df.plot.line(rot=40, title=feature) elif plot_type == "subplot": axes = df.plot.line(rot=40, subplots=True, title=feature) else: print(f"not supported plot type: '{plot_type}'")
[docs] def get_histogram_dataframes(self, data, display_format="sample_count", white_list_features=None) -> Dict: if white_list_features is None: white_list_features = [] display, pd = self.import_modules() (hists, edges) = self._prepare_histogram_data(data, display_format, white_list_features) all_features = [k for k in edges] target_features = self._get_target_features(all_features, white_list_features) feature_dfs = {} for feature in target_features: hist_data = hists[feature] index = edges[feature] df = pd.DataFrame(hist_data, index=index) feature_dfs[feature] = df return feature_dfs
def _prepare_histogram_data(self, data, display_format="sample_count", white_list_features=None): if white_list_features is None: white_list_features = [] all_features = [k for k in data] target_features = self._get_target_features(all_features, white_list_features) feature_hists = {} feature_edges = {} for feature in target_features: converted = convert_data(data[feature]) xs = converted["histogram"] hists = {} feature_edges[feature] = [] for i, ds in enumerate(xs): ds_hist = xs[ds] ds_bucket_counts = [] for bucket in ds_hist: if i == 0: feature_edges[feature].append(bucket[0]) if display_format == "percent": sum_value = self.sum_counts_in_histogram(ds_hist) ds_bucket_counts.append(bucket[2] / sum_value) else: ds_bucket_counts.append(bucket[2]) hists[ds] = ds_bucket_counts feature_hists[feature] = hists return feature_hists, feature_edges
[docs] def sum_counts_in_histogram(self, hist): sum_value = 0 for bucket in hist: sum_value += bucket[2] return sum_value
def _get_target_features(self, all_features, white_list_features=None): if white_list_features is None: white_list_features = [] target_features = white_list_features if not white_list_features: target_features = all_features return target_features