Source code for nvflare.fuel.utils.hash_utils

# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib

from nvflare.fuel.utils.validation_utils import check_number_range, check_str

# A large prime number as virtual hash table size
PRIME = 100003
MAX_NUM_BUCKETS = 64 * 1024


[docs] class UniformHash: """A hash algorithm with uniform distribution. It achieves this with following steps, 1. Get a hash value of the key using first 8 bytes of SHA256 2. Map the hash value to a virtual hash table using modulo operation 3. Map the virtual bucket to real bucket by even distribution The virtual hash is needed because real hash table size may not be a prime number and there are known issues of modulo operation. """ def __init__(self, num_buckets: int): """ Initialize the hash function Args: num_buckets: Number of buckets, e.g. Number of servers to distribute the load """ check_number_range("num_buckets", num_buckets, 1, MAX_NUM_BUCKETS) self.num_buckets = num_buckets self.virtual_hashes_per_bucket = PRIME // num_buckets
[docs] def get_num_buckets(self) -> int: """ Get the number of buckets Returns: Number of buckets """ return self.num_buckets
[docs] def hash(self, key: str) -> int: """ Hash the key to a bucket index Args: key: A string key to be hashed Returns: The bucket index between 0 and num_buckets-1 """ check_str("key", key) # Step 1, calculate hash value using first 8 bytes of SHA256 sha_bytes = hashlib.sha256(key.encode()).digest() sha = int.from_bytes(sha_bytes[:8], "big") # Step 2, map the hash value to a virtual hash table whose size is PRIME using modulo operation virtual_hash = sha % PRIME # Step 3, evenly distribute the virtual hash to real buckets # n is a float number representing the bucket index n = virtual_hash / self.virtual_hashes_per_bucket if n < self.num_buckets: index = int(n) else: # The last bucket may have more virtual hashes than others # Evenly spread the extra to the first few buckets index = virtual_hash % self.num_buckets return index