# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import sys
import time
from contextlib import contextmanager
import nvflare
from nvflare.tool.cli_output import output_error, output_error_message, output_ok, output_usage_error
from nvflare.tool.cli_session import add_startup_kit_selection_args
CMD_SYSTEM_STATUS = "status"
CMD_SYSTEM_RESOURCES = "resources"
CMD_SYSTEM_SHUTDOWN = "shutdown"
CMD_SYSTEM_RESTART = "restart"
CMD_SYSTEM_DISABLE_CLIENT = "disable-client"
CMD_SYSTEM_ENABLE_CLIENT = "enable-client"
CMD_SYSTEM_VERSION = "version"
CMD_SYSTEM_LOG_CONFIG = "log-config"
_DEFAULT_SYSTEM_STATE_CHANGE_TIMEOUT = 30.0
_system_sub_cmd_parsers = {}
def _positive_float(value: str) -> float:
try:
parsed = float(value)
except ValueError as e:
raise argparse.ArgumentTypeError(f"expected a positive number, got {value!r}") from e
if parsed <= 0:
raise argparse.ArgumentTypeError(f"expected a positive number, got {value!r}")
return parsed
def _get_system_state_change_timeout(args) -> float:
timeout = getattr(args, "timeout", _DEFAULT_SYSTEM_STATE_CHANGE_TIMEOUT)
if isinstance(timeout, bool) or not isinstance(timeout, (int, float)):
return _DEFAULT_SYSTEM_STATE_CHANGE_TIMEOUT
return float(timeout)
[docs]
def def_system_cli_parser(system_parser):
"""system_parser is already created in cli.py — add subcommands here."""
sub = system_parser.add_subparsers(title="system subcommands", metavar="", dest="system_sub_cmd")
# status
p = sub.add_parser(CMD_SYSTEM_STATUS, help="show server and client status")
p.add_argument("target", nargs="?", choices=["server", "client"], default=None)
p.add_argument("client_names", nargs="*", default=[])
add_startup_kit_selection_args(p)
p.add_argument("--schema", action="store_true")
_system_sub_cmd_parsers[CMD_SYSTEM_STATUS] = p
# resources
p = sub.add_parser(CMD_SYSTEM_RESOURCES, help="show server and client resource usage")
p.add_argument("target", nargs="?", choices=["server", "client"], default=None)
p.add_argument("client_names", nargs="*", default=[])
add_startup_kit_selection_args(p)
p.add_argument("--schema", action="store_true")
_system_sub_cmd_parsers[CMD_SYSTEM_RESOURCES] = p
# shutdown
p = sub.add_parser(CMD_SYSTEM_SHUTDOWN, help="shut down server, clients, or all")
p.add_argument("target", choices=["server", "client", "all"])
p.add_argument("client_names", nargs="*", default=[])
p.add_argument("--force", action="store_true")
p.add_argument("--no-wait", dest="no_wait", action="store_true")
p.add_argument(
"--timeout",
type=_positive_float,
default=_DEFAULT_SYSTEM_STATE_CHANGE_TIMEOUT,
help="seconds to wait for shutdown completion",
)
add_startup_kit_selection_args(p)
p.add_argument("--schema", action="store_true")
_system_sub_cmd_parsers[CMD_SYSTEM_SHUTDOWN] = p
# restart
p = sub.add_parser(CMD_SYSTEM_RESTART, help="restart server, clients, or all")
p.add_argument("target", choices=["server", "client", "all"])
p.add_argument("client_names", nargs="*", default=[])
p.add_argument("--force", action="store_true")
p.add_argument("--no-wait", dest="no_wait", action="store_true")
p.add_argument(
"--timeout",
type=_positive_float,
default=_DEFAULT_SYSTEM_STATE_CHANGE_TIMEOUT,
help="seconds to wait for restart completion",
)
add_startup_kit_selection_args(p)
p.add_argument("--schema", action="store_true")
_system_sub_cmd_parsers[CMD_SYSTEM_RESTART] = p
# disable-client
p = sub.add_parser(CMD_SYSTEM_DISABLE_CLIENT, help="disable a client from reconnecting")
p.add_argument("client_name", help="name of the client to disable")
p.add_argument("--force", action="store_true")
add_startup_kit_selection_args(p)
p.add_argument("--schema", action="store_true")
_system_sub_cmd_parsers[CMD_SYSTEM_DISABLE_CLIENT] = p
# enable-client
p = sub.add_parser(CMD_SYSTEM_ENABLE_CLIENT, help="enable a disabled client to reconnect")
p.add_argument("client_name", help="name of the client to enable")
p.add_argument("--force", action="store_true")
add_startup_kit_selection_args(p)
p.add_argument("--schema", action="store_true")
_system_sub_cmd_parsers[CMD_SYSTEM_ENABLE_CLIENT] = p
# version
p = sub.add_parser(CMD_SYSTEM_VERSION, help="show NVFlare version on each remote site")
p.add_argument("--site", default="all", help="server, a client name, or all")
add_startup_kit_selection_args(p)
p.add_argument("--schema", action="store_true")
_system_sub_cmd_parsers[CMD_SYSTEM_VERSION] = p
# log-config
p = sub.add_parser(CMD_SYSTEM_LOG_CONFIG, help="change logging level on server or client sites")
p.add_argument(
"level",
nargs="?",
default=None,
help="DEBUG, INFO, WARNING, ERROR, CRITICAL, concise, msg_only, full, verbose, reload",
)
p.add_argument("--site", default="all", help="server, a client name, or all")
add_startup_kit_selection_args(p)
p.add_argument("--schema", action="store_true")
_system_sub_cmd_parsers[CMD_SYSTEM_LOG_CONFIG] = p
return _system_sub_cmd_parsers
def _confirm_or_force(prompt, args):
"""Prompt for confirmation unless --force is set."""
if args.force:
return
if not sys.stdin.isatty():
output_error(
"INVALID_ARGS",
exit_code=4,
detail="non-interactive mode requires --force",
)
raise SystemExit(4)
from nvflare.tool.cli_output import prompt_yn
if not prompt_yn(prompt):
raise SystemExit(0)
def _get_system_session(args=None):
"""Create a secure session using the startup kit."""
from nvflare.tool.cli_output import get_connect_timeout
from nvflare.tool.cli_session import new_cli_session_for_args
try:
return new_cli_session_for_args(args=args, timeout=get_connect_timeout())
except ValueError as e:
output_error(
"STARTUP_KIT_MISSING",
exit_code=4,
detail=str(e),
hint=getattr(e, "hint", None),
)
raise SystemExit(4)
@contextmanager
def _system_session(args=None):
sess = _get_system_session(args)
try:
yield sess
finally:
if sess is not None:
sess.close()
def _fmt_ts(ts):
if not ts:
return "unknown"
try:
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(float(ts)))
except Exception:
return str(ts)
def _render_status_human(result, target_type):
from nvflare.tool.cli_output import print_human
jobs = result.get("jobs") or []
clients = result.get("clients") or []
client_status = result.get("client_status") or []
client_info_by_name = {}
for c in clients:
if isinstance(c, dict):
client_info_by_name[c.get("client_name")] = c
client_status_by_name = {}
for c in client_status:
if isinstance(c, dict):
client_status_by_name[c.get("client_name")] = c
if target_type in ("all", "server"):
print_human(f"Engine status: {result.get('server_status', 'unknown')}")
print_human(f"Start time: {_fmt_ts(result.get('server_start_time'))}")
job_id_w = max([len("JOB_ID")] + [len(str(job.get("job_id", "?"))) for job in jobs if isinstance(job, dict)])
app_name_w = max(
[len("APP NAME")] + [len(str(job.get("app_name", "?"))) for job in jobs if isinstance(job, dict)]
)
separator = f"+-{'-' * job_id_w}-+-{'-' * app_name_w}-+"
print_human(separator)
print_human(f"| {'JOB_ID':<{job_id_w}} | {'APP NAME':<{app_name_w}} |")
print_human(separator)
for job in jobs:
if isinstance(job, dict):
print_human(
f"| {str(job.get('job_id', '?')):<{job_id_w}} | {str(job.get('app_name', '?')):<{app_name_w}} |"
)
print_human(separator)
if target_type in ("all", "client"):
if target_type == "all":
client_count = len(clients)
else:
client_count = len(clients) if clients else len(client_status_by_name)
if target_type == "all":
print_human(f"Registered clients: {client_count}")
else:
print_human(f"Clients: {client_count}")
if clients or client_status:
print_human("")
names = []
seen = set()
for c in clients:
name = c.get("client_name") if isinstance(c, dict) else None
if name and name not in seen:
names.append(name)
seen.add(name)
for c in client_status:
name = c.get("client_name") if isinstance(c, dict) else None
if name and name not in seen:
names.append(name)
seen.add(name)
client_w = max([len("CLIENT")] + [len(name) for name in names]) if names else len("CLIENT")
fqcn_w = (
max([len("FQCN")] + [len((client_info_by_name.get(name, {}) or {}).get("fqcn", "-")) for name in names])
if names
else len("FQCN")
)
last_seen_w = len("LAST CONNECT TIME")
print_human(f"{'CLIENT':<{client_w}} {'FQCN':<{fqcn_w}} {'LAST CONNECT TIME':<{last_seen_w}} STATUS")
for name in names:
info = client_info_by_name.get(name, {})
status = client_status_by_name.get(name, {}).get("status", "unknown")
fqcn = info.get("fqcn", "-")
last_seen = _fmt_ts(info.get("client_last_conn_time")) if info else "-"
print_human(f"{name:<{client_w}} {fqcn:<{fqcn_w}} {last_seen:<{last_seen_w}} {status}")
def _output_system_status(result, target_type):
from nvflare.tool.cli_output import is_json_mode
if is_json_mode():
output_ok(result)
else:
_render_status_human(result, target_type)
def _render_version_human(result):
from nvflare.tool.cli_output import print_human
sites = result.get("sites") or []
compatible = result.get("compatible")
mismatched_sites = result.get("mismatched_sites")
admin_version = result.get("admin_version", "unknown")
print_human("Versions")
print_human(f" {'SITE':<16} VERSION")
for entry in sites:
if isinstance(entry, dict):
print_human(f" {entry.get('site', '?'):<16} {entry.get('version', 'unknown')}")
print_human("")
print_human(f"Admin version: {admin_version}")
if compatible is not None:
print_human(f"Compatible: {'yes' if compatible else 'no'}")
if mismatched_sites:
print_human(f"Mismatched sites: {', '.join(mismatched_sites)}")
else:
print_human("Mismatched sites: none")
def _output_system_version(result):
from nvflare.tool.cli_output import is_json_mode
if is_json_mode():
output_ok(result)
else:
_render_version_human(result)
def _is_no_client_response_error(e: Exception) -> bool:
return "no responses from clients" in str(e).lower()
[docs]
def cmd_system_status(args):
from nvflare.fuel.flare_api.api_spec import AuthenticationError, NoConnection
from nvflare.tool.cli_schema import handle_schema_flag
handle_schema_flag(
_system_sub_cmd_parsers.get(CMD_SYSTEM_STATUS),
"nvflare system status",
["nvflare system status", "nvflare system status server"],
sys.argv[1:],
)
target_type = getattr(args, "target", None) or "all"
client_names = getattr(args, "client_names", [])
try:
with _system_session(args) as sess:
result = sess.check_status(target_type, client_names if client_names else None)
except AuthenticationError:
raise
except NoConnection as e:
output_error_message(
"CONNECTION_FAILED",
message="Could not connect to the FLARE server.",
hint="Start the server or verify the admin startup kit endpoint.",
exit_code=2,
detail=str(e),
)
raise SystemExit(2)
except Exception as e:
if _is_no_client_response_error(e):
output_error_message(
"SYSTEM_NOT_READY",
message="FLARE system is not ready yet.",
hint=(
"Wait for clients to connect, then retry 'nvflare system status'. "
"If this persists, check POC service logs or client logs."
),
exit_code=2,
detail="no responses from clients",
)
raise SystemExit(2)
output_error("INTERNAL_ERROR", exit_code=5, detail=str(e))
raise SystemExit(5)
_output_system_status(result, target_type)
[docs]
def cmd_system_resources(args):
from nvflare.fuel.flare_api.api_spec import AuthenticationError, NoConnection
from nvflare.tool.cli_schema import handle_schema_flag
handle_schema_flag(
_system_sub_cmd_parsers.get(CMD_SYSTEM_RESOURCES),
"nvflare system resources",
["nvflare system resources", "nvflare system resources client"],
sys.argv[1:],
)
target_type = getattr(args, "target", None) or "all"
client_names = getattr(args, "client_names", [])
try:
with _system_session(args) as sess:
result = sess.report_resources(target_type, client_names if client_names else None)
except (AuthenticationError, NoConnection):
raise
except Exception as e:
output_error("CONNECTION_FAILED", exit_code=2, detail=str(e))
raise SystemExit(2)
if not result:
from nvflare.tool.cli_output import is_json_mode, print_human
if not is_json_mode():
print_human("No resources specified.")
return
output_ok(result)
[docs]
def cmd_system_shutdown(args):
from nvflare.fuel.flare_api.api_spec import AuthenticationError, InvalidTarget, NoConnection
from nvflare.tool.cli_schema import handle_schema_flag
handle_schema_flag(
_system_sub_cmd_parsers.get(CMD_SYSTEM_SHUTDOWN),
"nvflare system shutdown",
["nvflare system shutdown all --force"],
sys.argv[1:],
)
target = args.target
client_names = getattr(args, "client_names", [])
no_wait = getattr(args, "no_wait", False)
no_wait = no_wait if isinstance(no_wait, bool) else False
timeout = _get_system_state_change_timeout(args)
if target != "client" and client_names:
output_error(
"INVALID_ARGS",
exit_code=4,
detail=f"client_names are only valid for target 'client', got target='{target}'",
)
raise SystemExit(4)
_confirm_or_force(f"Really shutdown {target}?", args)
try:
with _system_session(args) as sess:
if no_wait:
result = sess.shutdown(target, client_names=client_names or None, wait=False)
else:
result = sess.shutdown(target, client_names=client_names or None, timeout=timeout)
except (AuthenticationError, NoConnection):
raise
except InvalidTarget as e:
output_error("INVALID_ARGS", exit_code=4, detail=str(e))
raise SystemExit(4)
except TimeoutError as e:
output_error_message(
"TIMEOUT",
message="System shutdown did not complete before the timeout.",
hint=(
"Increase --timeout, check system status, or use "
"'nvflare system shutdown <target> --no-wait' for fire-and-forget shutdown."
),
exit_code=3,
detail=str(e),
)
raise SystemExit(3)
except Exception as e:
output_error("CONNECTION_FAILED", exit_code=2, detail=str(e))
raise SystemExit(2)
output_ok(
{
"target": target,
"status": "shutdown_initiated" if no_wait else "stopped",
"timeout": None if no_wait else timeout,
"result": result,
}
)
[docs]
def cmd_system_restart(args):
from nvflare.fuel.flare_api.api_spec import AuthenticationError, InvalidTarget, NoConnection
from nvflare.tool.cli_schema import handle_schema_flag
handle_schema_flag(
_system_sub_cmd_parsers.get(CMD_SYSTEM_RESTART),
"nvflare system restart",
["nvflare system restart server --force"],
sys.argv[1:],
)
target = args.target
client_names = getattr(args, "client_names", [])
no_wait = getattr(args, "no_wait", False)
no_wait = no_wait if isinstance(no_wait, bool) else False
timeout = _get_system_state_change_timeout(args)
if target != "client" and client_names:
output_error(
"INVALID_ARGS",
exit_code=4,
detail=f"client_names are only valid for target 'client', got target='{target}'",
)
raise SystemExit(4)
_confirm_or_force(f"Really restart {target}?", args)
try:
with _system_session(args) as sess:
if no_wait:
result = sess.restart(target, client_names=client_names or None, wait=False)
else:
result = sess.restart(target, client_names=client_names or None, timeout=timeout)
except (AuthenticationError, NoConnection):
raise
except InvalidTarget as e:
output_error("INVALID_ARGS", exit_code=4, detail=str(e))
raise SystemExit(4)
except TimeoutError as e:
output_error_message(
"TIMEOUT",
message="System restart did not complete before the timeout.",
hint=(
"Increase --timeout, check system status, or use "
"'nvflare system restart <target> --no-wait' for fire-and-forget restart."
),
exit_code=3,
detail=str(e),
)
raise SystemExit(3)
except Exception as e:
output_error("CONNECTION_FAILED", exit_code=2, detail=str(e))
raise SystemExit(2)
output_ok(
{
"target": target,
"status": "restart_initiated" if no_wait else "restarted",
"timeout": None if no_wait else timeout,
"result": result,
}
)
[docs]
def cmd_system_version(args):
from nvflare.fuel.flare_api.api_spec import AuthenticationError, NoConnection
from nvflare.tool.cli_schema import handle_schema_flag
handle_schema_flag(
_system_sub_cmd_parsers.get(CMD_SYSTEM_VERSION),
"nvflare system version",
["nvflare system version", "nvflare system version --site server"],
sys.argv[1:],
)
site = getattr(args, "site", "all")
admin_version = getattr(nvflare, "__version__", "unknown")
target_type = "all" if site == "all" else "server" if site == "server" else "client"
try:
with _system_session(args) as sess:
if target_type == "server":
known_sites = ["server"]
targets = None
else:
sys_info = sess.get_system_info()
known_sites = ["server"] + [client.name for client in sys_info.client_info]
if site != "all" and site not in known_sites:
output_error("SITE_NOT_FOUND", site=site)
raise SystemExit(1)
targets = [site] if target_type == "client" else None
raw_versions = sess.report_version(target_type, targets)
except (AuthenticationError, NoConnection):
raise
except Exception as e:
output_error("CONNECTION_FAILED", exit_code=2, detail=str(e))
raise SystemExit(2)
sites = [site] if site != "all" else known_sites
versions = []
for s in sites:
payload = raw_versions.get(s, {}) if isinstance(raw_versions, dict) else {}
version = payload.get("version") if isinstance(payload, dict) else None
versions.append({"site": s, "version": version or "unknown"})
result = {"sites": versions, "admin_version": admin_version}
server_version = next((v["version"] for v in versions if v["site"] == "server"), None)
if server_version is not None:
result["compatible"] = all(v["version"] == server_version for v in versions)
result["mismatched_sites"] = [v["site"] for v in versions if v["version"] != server_version]
_output_system_version(result)
[docs]
def cmd_system_log(args):
from nvflare.fuel.flare_api.api_spec import AuthenticationError, NoConnection
from nvflare.tool.cli_schema import handle_schema_flag
handle_schema_flag(
_system_sub_cmd_parsers.get(CMD_SYSTEM_LOG_CONFIG),
"nvflare system log-config",
[
"nvflare system log-config DEBUG",
"nvflare system log-config concise",
],
sys.argv[1:],
)
level = getattr(args, "level", None)
site = getattr(args, "site", "all")
if not level:
parser = _system_sub_cmd_parsers.get(CMD_SYSTEM_LOG_CONFIG)
output_usage_error(
None if getattr(args, "schema", False) else parser,
"specify a log level or mode",
exit_code=4,
error_code="LOG_CONFIG_INVALID",
message="Log config is not a recognised log mode.",
hint="Supply one of: DEBUG, INFO, WARNING, ERROR, CRITICAL, concise, msg_only, full, verbose, reload.",
)
raise SystemExit(4)
try:
with _system_session(args) as sess:
sess.configure_site_log(level, target=site)
except (AuthenticationError, NoConnection):
raise
except Exception as e:
output_error("CONNECTION_FAILED", exit_code=2, detail=str(e))
raise SystemExit(2)
output_ok({"site": site, "log_config": level, "status": "applied"})
def _first_client_result(result: dict, client_name: str, state: str, rejoin_allowed: bool) -> dict:
clients = result.get("clients") if isinstance(result, dict) else None
if clients and isinstance(clients, list):
first = clients[0]
if isinstance(first, dict):
return first
return {
"client_name": client_name,
"state": state,
"credential_revoked": False,
"rejoin_allowed": rejoin_allowed,
}
[docs]
def cmd_system_disable_client(args):
from nvflare.fuel.flare_api.api_spec import AuthenticationError, NoConnection
from nvflare.tool.cli_schema import handle_schema_flag
handle_schema_flag(
_system_sub_cmd_parsers.get(CMD_SYSTEM_DISABLE_CLIENT),
"nvflare system disable-client",
["nvflare system disable-client site-1 --force"],
sys.argv[1:],
)
client_name = args.client_name
_confirm_or_force(f"Really disable client '{client_name}'?", args)
try:
with _system_session(args) as sess:
result = sess.disable_client(client_name)
except (AuthenticationError, NoConnection):
raise
except Exception as e:
output_error("CONNECTION_FAILED", exit_code=2, detail=str(e))
raise SystemExit(2)
output_ok(_first_client_result(result, client_name, "disabled", False))
[docs]
def cmd_system_enable_client(args):
from nvflare.fuel.flare_api.api_spec import AuthenticationError, NoConnection
from nvflare.tool.cli_schema import handle_schema_flag
handle_schema_flag(
_system_sub_cmd_parsers.get(CMD_SYSTEM_ENABLE_CLIENT),
"nvflare system enable-client",
["nvflare system enable-client site-1 --force"],
sys.argv[1:],
)
client_name = args.client_name
_confirm_or_force(f"Really enable client '{client_name}'?", args)
try:
with _system_session(args) as sess:
result = sess.enable_client(client_name)
except (AuthenticationError, NoConnection):
raise
except Exception as e:
output_error("CONNECTION_FAILED", exit_code=2, detail=str(e))
raise SystemExit(2)
output_ok(_first_client_result(result, client_name, "enabled", True))
_system_handlers = {
CMD_SYSTEM_STATUS: cmd_system_status,
CMD_SYSTEM_RESOURCES: cmd_system_resources,
CMD_SYSTEM_SHUTDOWN: cmd_system_shutdown,
CMD_SYSTEM_RESTART: cmd_system_restart,
CMD_SYSTEM_DISABLE_CLIENT: cmd_system_disable_client,
CMD_SYSTEM_ENABLE_CLIENT: cmd_system_enable_client,
CMD_SYSTEM_VERSION: cmd_system_version,
CMD_SYSTEM_LOG_CONFIG: cmd_system_log,
}
[docs]
def handle_system_cmd(args):
sub_cmd = getattr(args, "system_sub_cmd", None)
if sub_cmd is None:
from nvflare.cli_unknown_cmd_exception import CLIUnknownCmdException
raise CLIUnknownCmdException("system subcommand required")
handler = _system_handlers.get(sub_cmd)
if handler is None:
from nvflare.cli_unknown_cmd_exception import CLIUnknownCmdException
raise CLIUnknownCmdException(f"Unknown system subcommand: {sub_cmd}")
handler(args)