Week 5 作业 01
发布于: 2020 年 10 月 25 日
用你熟悉的编程语言实现一致性 hash 算法。
编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。
#!/usr/bin/env python
# encoding: utf-8
from hashlib import md5
from bintrees.rbtree import RBTree
import random
from pprint import pprint
import numpy
hash_size = pow(2,32)
def _not_exists(key):
return key is None or key == -1
# 找上限
def find_upper(root, elem):
if root is None:
return -1
if elem == root.key:
return root.key
elif elem < root.key:
maybe_max = find_upper(root.left, elem)
if _not_exists(maybe_max):
return root.key
return maybe_max
else:
maybe_max = find_upper(root.right, elem)
if _not_exists(maybe_max):
return -1
return maybe_max
# 找下限
def find_lower(root, elem):
if root is None:
return -1
if elem == root.key:
return root.key
elif elem < root.key:
maybe_min = find_lower(root.left, elem)
if _not_exists(maybe_min):
return -1
return maybe_min
else:
maybe_min = find_lower(root.right, elem)
if _not_exists(maybe_min):
return root.key
return maybe_min
def find_next(root, elem):
if root is None:
return None, None
return find_lower(root, elem), find_upper(root, elem)
def trace_tree(root):
if root:
print("{} {} {}".format(
root.key,
root.left.key if root.left is not None else None,
root.right.key if root.right is not None else None))
trace_tree(root.left)
trace_tree(root.right)
def get_mean_var_std(arr):
import numpy as np
#求均值
arr_mean = np.mean(arr)
#求方差
arr_var = np.var(arr)
#求标准差
arr_std = np.std(arr,ddof=1)
print("平均值为:%f" % arr_mean)
print("方差为:%f" % arr_var)
print("标准差为:%f" % arr_std)
return arr_mean ,arr_var ,arr_std
class CHNode(object):
def __init__(self, host_name, id=None, vhost=None):
self._id = id
self.host_name = host_name
if vhost:
self._hash_host_name = "{}#{}".format(host_name, vhost)
else:
self._hash_host_name = host_name
def get_id(self, ch_size):
if self._id:
return self._id
return int(md5(self._hash_host_name.encode("utf-8")).hexdigest(), 16) % ch_size
class ConsistHash(object):
def __init__(self, size=hash_size):
self.size = size # set consistent hash circul size
self.rbt = RBTree() # red black tree
def insert_host(self, host):
host_id = host.get_id(self.size)
self.rbt.insert(host_id, host)
def remove_host(self, host):
host_id = host.get_id(self.size)
self.rbt.remove(host_id)
@staticmethod
def _find_upper(root, elem):
if root is None:
return -1
if elem == root.key:
return root.key
elif elem < root.key:
maybe_max = find_upper(root.left, elem)
if _not_exists(maybe_max):
return root.key
return maybe_max
else:
maybe_max = find_upper(root.right, elem)
if _not_exists(maybe_max):
return -1
return maybe_max
def find_host(self, id):
id %= self.size
idx = self._find_upper(self.rbt._root, id)
if idx == -1: # id larger than max id
# assert tree is not empty
return self.rbt.min_item()[1]
return self.rbt.get_value(idx)
if __name__ == "__main__":
rbt = RBTree()
for i in range(0, 30, 2):
rbt.insert(i, "{}".format(i))
ch = ConsistHash()
arr_nodes = [["192.168.0.1"], ["192.168.0.2"], ["192.168.0.3"], ["192.168.0.4"], ["192.168.0.5"], ["192.168.0.6"], ["192.168.0.7"], ["192.168.0.8"], ["192.168.0.9"], ["192.168.0.10"] ]
arr_nodes2 = ["192.168.0.1", "192.168.0.2", "192.168.0.3", "192.168.0.4", "192.168.0.5", "192.168.0.6", "192.168.0.7", "192.168.0.8", "192.168.0.9", "192.168.0.10" ]
ch.insert_host(CHNode(arr_nodes[0][0]))
ch.insert_host(CHNode(arr_nodes[1][0]))
ch.insert_host(CHNode(arr_nodes[2][0]))
ch.insert_host(CHNode(arr_nodes[3][0]))
ch.insert_host(CHNode(arr_nodes[4][0]))
ch.insert_host(CHNode(arr_nodes[5][0]))
ch.insert_host(CHNode(arr_nodes[6][0]))
ch.insert_host(CHNode(arr_nodes[7][0]))
ch.insert_host(CHNode(arr_nodes[8][0]))
ch.insert_host(CHNode(arr_nodes[9][0]))
for i in range(1, 1000000):
search_elem = random.randint(1, hash_size)
nodes = ch.find_host(search_elem).host_name
#print("{} in {}".format(search_elem, nodes))
nodes = ch.find_host(search_elem).host_name
position = arr_nodes2.index(nodes)
arr_nodes[position].append(search_elem)
#pprint(arr_nodes)
arr_node_count = []
arr_node_count_dif = []
for i in range(len(arr_nodes)):
arr_nodes[i].pop(0)
arr_node_count.append(len(arr_nodes[i]))
print("Node:{} / Count:{} ".format(arr_nodes2[i], len(arr_nodes[i])))
get_mean_var_std(arr_node_count)
复制代码
划线
评论
复制
发布于: 2020 年 10 月 25 日阅读数: 29
Croesus
关注
还未添加个人签名 2019.01.05 加入
还未添加个人简介
评论