写点什么

Week 5 作业 01

用户头像
Croesus
关注
发布于: 2020 年 10 月 25 日
  1. 用你熟悉的编程语言实现一致性 hash 算法。

  2. 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。



#!/usr/bin/env python# encoding: utf-8

from hashlib import md5from bintrees.rbtree import RBTreeimport randomfrom pprint import pprintimport numpy
hash_size = pow(2,32)
def _not_exists(key): return key is None or key == -1

# 找上限def find_upper(root, elem): if root is None: return -1
if elem == root.key: return root.key elif elem < root.key: maybe_max = find_upper(root.left, elem) if _not_exists(maybe_max): return root.key return maybe_max else: maybe_max = find_upper(root.right, elem) if _not_exists(maybe_max): return -1 return maybe_max

# 找下限def find_lower(root, elem): if root is None: return -1
if elem == root.key: return root.key elif elem < root.key: maybe_min = find_lower(root.left, elem) if _not_exists(maybe_min): return -1 return maybe_min else: maybe_min = find_lower(root.right, elem) if _not_exists(maybe_min): return root.key return maybe_min

def find_next(root, elem): if root is None: return None, None return find_lower(root, elem), find_upper(root, elem)

def trace_tree(root): if root: print("{} {} {}".format( root.key, root.left.key if root.left is not None else None, root.right.key if root.right is not None else None)) trace_tree(root.left) trace_tree(root.right)

def get_mean_var_std(arr): import numpy as np #求均值 arr_mean = np.mean(arr) #求方差 arr_var = np.var(arr) #求标准差 arr_std = np.std(arr,ddof=1) print("平均值为:%f" % arr_mean) print("方差为:%f" % arr_var) print("标准差为:%f" % arr_std)
return arr_mean ,arr_var ,arr_std
class CHNode(object): def __init__(self, host_name, id=None, vhost=None): self._id = id self.host_name = host_name if vhost: self._hash_host_name = "{}#{}".format(host_name, vhost) else: self._hash_host_name = host_name
def get_id(self, ch_size): if self._id: return self._id return int(md5(self._hash_host_name.encode("utf-8")).hexdigest(), 16) % ch_size

class ConsistHash(object): def __init__(self, size=hash_size): self.size = size # set consistent hash circul size self.rbt = RBTree() # red black tree
def insert_host(self, host): host_id = host.get_id(self.size) self.rbt.insert(host_id, host)
def remove_host(self, host): host_id = host.get_id(self.size) self.rbt.remove(host_id)
@staticmethod def _find_upper(root, elem): if root is None: return -1
if elem == root.key: return root.key elif elem < root.key: maybe_max = find_upper(root.left, elem) if _not_exists(maybe_max): return root.key return maybe_max else: maybe_max = find_upper(root.right, elem) if _not_exists(maybe_max): return -1 return maybe_max
def find_host(self, id): id %= self.size idx = self._find_upper(self.rbt._root, id) if idx == -1: # id larger than max id # assert tree is not empty return self.rbt.min_item()[1] return self.rbt.get_value(idx)

if __name__ == "__main__": rbt = RBTree() for i in range(0, 30, 2): rbt.insert(i, "{}".format(i)) ch = ConsistHash() arr_nodes = [["192.168.0.1"], ["192.168.0.2"], ["192.168.0.3"], ["192.168.0.4"], ["192.168.0.5"], ["192.168.0.6"], ["192.168.0.7"], ["192.168.0.8"], ["192.168.0.9"], ["192.168.0.10"] ] arr_nodes2 = ["192.168.0.1", "192.168.0.2", "192.168.0.3", "192.168.0.4", "192.168.0.5", "192.168.0.6", "192.168.0.7", "192.168.0.8", "192.168.0.9", "192.168.0.10" ]
ch.insert_host(CHNode(arr_nodes[0][0])) ch.insert_host(CHNode(arr_nodes[1][0])) ch.insert_host(CHNode(arr_nodes[2][0])) ch.insert_host(CHNode(arr_nodes[3][0])) ch.insert_host(CHNode(arr_nodes[4][0])) ch.insert_host(CHNode(arr_nodes[5][0])) ch.insert_host(CHNode(arr_nodes[6][0])) ch.insert_host(CHNode(arr_nodes[7][0])) ch.insert_host(CHNode(arr_nodes[8][0])) ch.insert_host(CHNode(arr_nodes[9][0]))
for i in range(1, 1000000): search_elem = random.randint(1, hash_size) nodes = ch.find_host(search_elem).host_name #print("{} in {}".format(search_elem, nodes)) nodes = ch.find_host(search_elem).host_name position = arr_nodes2.index(nodes) arr_nodes[position].append(search_elem) #pprint(arr_nodes)
arr_node_count = [] arr_node_count_dif = []
for i in range(len(arr_nodes)): arr_nodes[i].pop(0) arr_node_count.append(len(arr_nodes[i])) print("Node:{} / Count:{} ".format(arr_nodes2[i], len(arr_nodes[i])))
get_mean_var_std(arr_node_count)
复制代码


用户头像

Croesus

关注

还未添加个人签名 2019.01.05 加入

还未添加个人简介

评论

发布
暂无评论
Week 5 作业01