Source code for nvflare.app_opt.job_launcher.study_data

# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import logging
import re
from dataclasses import dataclass
from typing import Optional

import yaml

DATA_MOUNT_ROOT = "/data"
MODE_RO = "ro"
MODE_RW = "rw"

_VALID_PATH_COMPONENT = re.compile(r"^[a-z0-9](?:[a-z0-9_-]{0,61}[a-z0-9])?$")



[docs]
@dataclass(frozen=True)
class StudyDatasetMount:
    study: str
    dataset: str
    source: str
    mode: str

    @property
    def mount_path(self) -> str:
        return f"{DATA_MOUNT_ROOT}/{self.study}/{self.dataset}"

    @property
    def read_only(self) -> bool:
        return self.mode == MODE_RO



def _validate_path_component(value: str, label: str, file_path: str) -> None:
    if not isinstance(value, str) or not _VALID_PATH_COMPONENT.match(value):
        raise ValueError(f"{label} {value!r} in '{file_path}' is not a valid study-data path component.")


def _log_warning(logger: Optional[logging.Logger], message: str, *args) -> None:
    if logger:
        logger.warning(message, *args)



[docs]
def load_study_data_file(file_path: str, logger: Optional[logging.Logger] = None) -> dict:
    try:
        with open(file_path, "rt") as f:
            study_data = yaml.safe_load(f)
    except FileNotFoundError:
        _log_warning(logger, "study data file '%s' was not found; no study data mounts will be configured", file_path)
        return {}
    except OSError as e:
        raise ValueError(f"Could not read study data file '{file_path}': {e}") from e
    except yaml.YAMLError as e:
        raise ValueError(f"Could not parse study data file '{file_path}': {e}") from e

    if study_data is None:
        study_data = {}

    if not isinstance(study_data, dict):
        raise ValueError(f"file at study_data_file_path '{file_path}' does not contain a dictionary.")

    if not study_data:
        _log_warning(
            logger, "study data file '%s' has no study entries; no study data mounts will be configured", file_path
        )

    for study, datasets in study_data.items():
        _validate_path_component(study, "study name", file_path)
        if not isinstance(datasets, dict):
            raise ValueError(
                f"study_data.yaml uses study -> dataset -> {{source, mode}}; entry for study '{study}' "
                f"in '{file_path}' must be a dictionary."
            )
        for dataset, entry in datasets.items():
            _validate_path_component(dataset, "dataset name", file_path)
            if not isinstance(entry, dict):
                raise ValueError(
                    f"dataset entry '{study}/{dataset}' in '{file_path}' must be a dictionary with source and mode."
                )
            source = entry.get("source")
            if not isinstance(source, str) or not source:
                raise ValueError(f"dataset entry '{study}/{dataset}' in '{file_path}' must define a non-empty source.")
            mode = entry.get("mode")
            if mode not in (MODE_RO, MODE_RW):
                raise ValueError(f"dataset entry '{study}/{dataset}' in '{file_path}' must set mode to 'ro' or 'rw'.")

    return study_data




[docs]
def should_mount_study_data(study: Optional[str]) -> bool:
    return bool(study)




[docs]
def resolve_study_dataset_mounts(
    study_data: dict, study: str, file_path: str, logger: Optional[logging.Logger] = None
) -> list[StudyDatasetMount]:
    """Resolve mounts for a study. Empty-file warnings are emitted by load_study_data_file()."""
    datasets = study_data.get(study)
    if datasets is None:
        if study_data:
            _log_warning(
                logger,
                "study data file '%s' has no entry for study '%s'; no study data mounts will be configured",
                file_path,
                study,
            )
        return []
    if not datasets:
        _log_warning(
            logger,
            "study data file '%s' entry for study '%s' has no datasets; no study data mounts will be configured",
            file_path,
            study,
        )
        return []

    _validate_path_component(study, "study name", file_path)
    return [
        StudyDatasetMount(study=study, dataset=dataset, source=entry["source"], mode=entry["mode"])
        for dataset, entry in datasets.items()
    ]