Source code for nvflare.fuel.utils.hash_utils
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
from nvflare.fuel.utils.validation_utils import check_number_range, check_str
# A large prime number as virtual hash table size
PRIME = 100003
MAX_NUM_BUCKETS = 64 * 1024
[docs]
class UniformHash:
"""A hash algorithm with uniform distribution. It achieves this with following steps,
1. Get a hash value of the key using first 8 bytes of SHA256
2. Map the hash value to a virtual hash table using modulo operation
3. Map the virtual bucket to real bucket by even distribution
The virtual hash is needed because real hash table size may not be a prime number and
there are known issues of modulo operation.
"""
def __init__(self, num_buckets: int):
"""
Initialize the hash function
Args:
num_buckets: Number of buckets, e.g. Number of servers to distribute the load
"""
check_number_range("num_buckets", num_buckets, 1, MAX_NUM_BUCKETS)
self.num_buckets = num_buckets
self.virtual_hashes_per_bucket = PRIME // num_buckets
[docs]
def get_num_buckets(self) -> int:
"""
Get the number of buckets
Returns:
Number of buckets
"""
return self.num_buckets
[docs]
def hash(self, key: str) -> int:
"""
Hash the key to a bucket index
Args:
key: A string key to be hashed
Returns:
The bucket index between 0 and num_buckets-1
"""
check_str("key", key)
# Step 1, calculate hash value using first 8 bytes of SHA256
sha_bytes = hashlib.sha256(key.encode()).digest()
sha = int.from_bytes(sha_bytes[:8], "big")
# Step 2, map the hash value to a virtual hash table whose size is PRIME using modulo operation
virtual_hash = sha % PRIME
# Step 3, evenly distribute the virtual hash to real buckets
# n is a float number representing the bucket index
n = virtual_hash / self.virtual_hashes_per_bucket
if n < self.num_buckets:
index = int(n)
else:
# The last bucket may have more virtual hashes than others
# Evenly spread the extra to the first few buckets
index = virtual_hash % self.num_buckets
return index