写点什么

架构师训练营 W5 作业

用户头像
Kun
关注
发布于: 2020 年 07 月 07 日
架构师训练营 W5 作业



  • 用你熟悉的编程语言实现一致性 hash 算法。

# Python3
from bisect import insort_left, bisect
from hashlib import md5
from collections import defaultdict
class ConsistentHashing:
def __init__(self, nodes, vnodes_num=200):
self.nodes = set() # store physical nodes
self.vnodes_num = vnodes_num # num of each physical node's virtuals nodes
self.vnode_to_node = {} # mapping vnode to node
self.ring = {} # mapping hashcode to vnode
self._sorted_hashcode = [] # sorted hashcode of all vnodes
for node in nodes:
self.add_node(node)
def add_node(self, node_name):
self.nodes.add(node_name)
self._generate_vnode(node_name)
# generate virtual nodes and hash them to the ring
def _generate_vnode(self, node):
for i in range(self.vnodes_num):
vnode = node + '_' + str(i)
self.vnode_to_node[vnode] = node
vnode_hashcode = self._hash(vnode)
self.ring[vnode_hashcode] = vnode
insort_left(self._sorted_hashcode, vnode_hashcode)
# remove virtual nodes and remove the hashcodes in the ring
def _remove_vnode(self, node):
for vnode in self.vnode_to_node:
if vnode.startswith(node):
del self.vnode_to_node[vnode]
vnode_hashcode = self._hash(vnode)
del self.ring[vnode_hashcode]
self._sorted_hashcode.remove(vnode_hashcode)
def delete_node(self, node_name):
self.nodes.remove(node_name)
self._remove_vnode(node_name)
def get_node_by_key(self, key):
key_hashcode = self._hash(key)
pos = bisect(self._sorted_hashcode, key_hashcode)
# handle the last pos, should be the first since it's a ring
if pos == len(self._sorted_hashcode):
pos = 0
return self.vnode_to_node[self.ring[self._sorted_hashcode[pos]]]
def _hash(self, s):
return int(md5(s.encode("utf-8")).hexdigest(), 16)
def std(nums):
var = 0
avg = sum(nums) / len(nums)
for n in nums:
var += (n - avg) ** 2
return (var / len(nums)) ** 0.5
def cal(nodes_num, vnodes_num):
print("testing {} nodes, each nodes with {} virtual nodes".format(
nodes_num, vnodes_num))
NUMS_SAMPLE = 1000000
nodes = ['node-' + str(i + 1) for i in range(nodes_num)]
ch = ConsistentHashing(nodes, vnodes_num)
keys = [str(i) for i in range(NUMS_SAMPLE)]
vnodes_counter = defaultdict(list)
for k in keys:
node = ch.get_node_by_key(k)
vnodes_counter[node].append(node)
nums = []
for node, vnodes in sorted(vnodes_counter.items(), key=lambda x: (x[0], x[1])):
print(node, len(vnodes), '{}%'.format(len(vnodes) * 100 / NUMS_SAMPLE))
nums.append(len(vnodes))
print("the standard deviation is: {}".format(str(std(nums))))
print("----------------------------------------------------")
if __name__ == "__main__":
cal(10, 50)
cal(10, 100)
cal(10, 150)
cal(10, 200)
cal(10, 500)
cal(10, 1000)



  • 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。



testing 10 nodes, each nodes with 50 virtual nodes

node-1 122268 12.2268%

node-10 97594 9.7594%

node-2 84704 8.4704%

node-3 104373 10.4373%

node-4 89792 8.9792%

node-5 97917 9.7917%

node-6 86536 8.6536%

node-7 79076 7.9076%

node-8 103683 10.3683%

node-9 134057 13.4057%

the standard deviation is: 16296.694045112341

----------------------------------------------------

testing 10 nodes, each nodes with 100 virtual nodes

node-1 115116 11.5116%

node-10 105172 10.5172%

node-2 101870 10.187%

node-3 97274 9.7274%

node-4 88694 8.8694%

node-5 86765 8.6765%

node-6 100667 10.0667%

node-7 97573 9.7573%

node-8 101483 10.1483%

node-9 105386 10.5386%

the standard deviation is: 7789.129476392082

----------------------------------------------------

testing 10 nodes, each nodes with 150 virtual nodes

node-1 114112 11.4112%

node-10 92841 9.2841%

node-2 99754 9.9754%

node-3 104443 10.4443%

node-4 97979 9.7979%

node-5 99120 9.912%

node-6 101977 10.1977%

node-7 87745 8.7745%

node-8 89603 8.9603%

node-9 112426 11.2426%

the standard deviation is: 8316.586258796333

----------------------------------------------------

testing 10 nodes, each nodes with 200 virtual nodes

node-1 100729 10.0729%

node-10 103588 10.3588%

node-2 110177 11.0177%

node-3 102767 10.2767%

node-4 93552 9.3552%

node-5 91928 9.1928%

node-6 99610 9.961%

node-7 99157 9.9157%

node-8 86060 8.606%

node-9 112432 11.2432%

the standard deviation is: 7623.0418075726175

----------------------------------------------------

testing 10 nodes, each nodes with 500 virtual nodes

node-1 94956 9.4956%

node-10 106153 10.6153%

node-2 107295 10.7295%

node-3 103596 10.3596%

node-4 98780 9.878%

node-5 95067 9.5067%

node-6 103303 10.3303%

node-7 101199 10.1199%

node-8 92065 9.2065%

node-9 97586 9.7586%

the standard deviation is: 4862.216634416858

----------------------------------------------------

testing 10 nodes, each nodes with 1000 virtual nodes

node-1 100301 10.0301%

node-10 103702 10.3702%

node-2 104070 10.407%

node-3 103183 10.3183%

node-4 96991 9.6991%

node-5 100223 10.0223%

node-6 98459 9.8459%

node-7 100608 10.0608%

node-8 98019 9.8019%

node-9 94444 9.4444%

the standard deviation is: 2951.8374955271506

----------------------------------------------------



可以看出,虚拟节点数量增加,负载的均衡越好。

发布于: 2020 年 07 月 07 日阅读数: 76
用户头像

Kun

关注

Life is short. 2018.01.13 加入

Software Developer

评论

发布
暂无评论
架构师训练营 W5 作业