# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from abc import ABC, abstractmethod
from nvflare.apis.fl_context import FLContext
from nvflare.apis.overseer_spec import SP
from nvflare.fuel.utils.log_utils import get_module_logger
ACTION = "_action"
MESSAGE = "_message"
NIS = "Not In Service"
ABORT_RUN = "Abort Run"
SERVICE = "In Service"
[docs]
class ServiceSession:
def __init__(self, host: str = "", port: str = "", ssid: str = "") -> None:
self.host = host
self.service_port = port
self.ssid = ssid
[docs]
class ServerState(ABC):
NOT_IN_SERVICE = {ACTION: NIS, MESSAGE: "Server not in service"}
ABORT_CURRENT_RUN = {ACTION: ABORT_RUN, MESSAGE: "Abort current run"}
IN_SERVICE = {ACTION: SERVICE, MESSAGE: "Server in service"}
logger = get_module_logger(__module__, __qualname__)
def __init__(self, host: str = "", port: str = "", ssid: str = "") -> None:
self.host = host
self.service_port = port
self.ssid = ssid
self.primary = False
[docs]
@abstractmethod
def register(self, fl_ctx: FLContext) -> dict:
pass
[docs]
@abstractmethod
def heartbeat(self, fl_ctx: FLContext) -> dict:
pass
[docs]
@abstractmethod
def get_task(self, fl_ctx: FLContext) -> dict:
pass
[docs]
@abstractmethod
def submit_result(self, fl_ctx: FLContext) -> dict:
pass
[docs]
@abstractmethod
def aux_communicate(self, fl_ctx: FLContext) -> dict:
pass
[docs]
@abstractmethod
def handle_sd_callback(self, sp: SP, fl_ctx: FLContext) -> ServerState:
pass
[docs]
class ColdState(ServerState):
[docs]
def register(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def heartbeat(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def get_task(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def submit_result(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def aux_communicate(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def handle_sd_callback(self, sp: SP, fl_ctx: FLContext) -> ServerState:
if sp and sp.primary is True:
if sp.name == self.host and sp.fl_port == self.service_port:
self.primary = True
self.ssid = sp.service_session_id
self.logger.info(
f"Got the primary sp: {sp.name} fl_port: {sp.fl_port} SSID: {sp.service_session_id}. "
f"Turning to hot."
)
return Cold2HotState(host=self.host, port=self.service_port, ssid=sp.service_session_id)
else:
self.primary = False
return self
return self
[docs]
class Cold2HotState(ServerState):
[docs]
def register(self, fl_ctx: FLContext) -> dict:
return ServerState.IN_SERVICE
[docs]
def heartbeat(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def get_task(self, fl_ctx: FLContext) -> dict:
return ServerState.ABORT_CURRENT_RUN
[docs]
def submit_result(self, fl_ctx: FLContext) -> dict:
return ServerState.ABORT_CURRENT_RUN
[docs]
def aux_communicate(self, fl_ctx: FLContext) -> dict:
return ServerState.ABORT_CURRENT_RUN
[docs]
def handle_sd_callback(self, sp: SP, fl_ctx: FLContext) -> ServerState:
return self
[docs]
class HotState(ServerState):
[docs]
def register(self, fl_ctx: FLContext) -> dict:
return ServerState.IN_SERVICE
[docs]
def heartbeat(self, fl_ctx: FLContext) -> dict:
return ServerState.IN_SERVICE
[docs]
def get_task(self, fl_ctx: FLContext) -> dict:
return ServerState.IN_SERVICE
[docs]
def submit_result(self, fl_ctx: FLContext) -> dict:
return ServerState.IN_SERVICE
[docs]
def aux_communicate(self, fl_ctx: FLContext) -> dict:
return ServerState.IN_SERVICE
[docs]
def handle_sd_callback(self, sp: SP, fl_ctx: FLContext) -> ServerState:
if sp and sp.primary is True:
if sp.name == self.host and sp.fl_port == self.service_port:
self.primary = True
if sp.service_session_id != self.ssid:
self.ssid = sp.service_session_id
self.logger.info(
f"Primary sp changed to: {sp.name} fl_port: {sp.fl_port} SSID: {sp.service_session_id}. "
f"Turning to Cold"
)
return Hot2ColdState(host=self.host, port=self.service_port, ssid=sp.service_session_id)
else:
return self
else:
self.primary = False
self.logger.info(
f"Primary sp changed to: {sp.name} fl_port: {sp.fl_port} SSID: {sp.service_session_id}. "
f"Turning to Cold"
)
return Hot2ColdState(host=self.host, port=self.service_port)
return self
[docs]
class Hot2ColdState(ServerState):
[docs]
def register(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def heartbeat(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def get_task(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def submit_result(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def aux_communicate(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def handle_sd_callback(self, sp: SP, fl_ctx: FLContext) -> ServerState:
return self
[docs]
class ShutdownState(ServerState):
[docs]
def register(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def heartbeat(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def get_task(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def submit_result(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def aux_communicate(self, fl_ctx: FLContext) -> dict:
return ServerState.NOT_IN_SERVICE
[docs]
def handle_sd_callback(self, sp: SP, fl_ctx: FLContext) -> ServerState:
return self