架构师训练营第 5 周作业
发布于: 2020 年 07 月 09 日
用你熟悉的编程语言实现一致性hash算法
package mainimport ( "crypto/md5" "fmt" "math" "sort" "sync")const defaultVirtualSpots = 400type Node struct { ip string}type virtualNode struct { node *Node spotValue uint32}type HashRing struct { spots int vNodes []virtualNode nodes []Node mu sync.RWMutex}func NewHashRing(spots int) *HashRing { if spots == 0 { spots = defaultVirtualSpots } return &HashRing{ spots: spots, vNodes: make([]virtualNode, 0), nodes: make([]Node, 0), }}func (h *HashRing)AddNode(n Node) { h.mu.Lock() defer h.mu.Unlock() h.nodes = append(h.nodes, n) h.generate()}func (h *HashRing)RemoveNode(ip string) { h.mu.Lock() defer h.mu.Unlock() removed := false for i := range h.nodes { if h.nodes[i].ip == ip { removed = true h.nodes = append(h.nodes[:i], h.nodes[i+1:]...) break } } if removed { h.generate() }}func (h *HashRing)generate() { var vNodes []virtualNode for i, n := range h.nodes { for j := 1; j <= h.spots; j++ { data := fmt.Sprintf("%s-%d", n.ip, j) value := md5hash([]byte(data)) vn := virtualNode { node: &h.nodes[i], spotValue: value, } vNodes = append(vNodes, vn) } } sort.Slice(vNodes, func(i, j int) bool {return vNodes[i].spotValue < vNodes[j].spotValue }) h.vNodes = vNodes}func (h *HashRing)GetNode(key string) Node { h.mu.RLock() defer h.mu.RUnlock() value := md5hash([]byte(key)) i := sort.Search(len(h.vNodes), func(i int)bool {return h.vNodes[i].spotValue >= value }) var n Node if i < len(h.vNodes) { n = *h.vNodes[i].node } else { n = *h.vNodes[0].node } return n}func md5hash(data []byte) uint32 { h := md5.New() h.Write(data) r := h.Sum(nil) return (uint32(r[3]) << 24) | (uint32(r[2]) << 16) | (uint32(r[1]) << 8) | uint32(r[0])}
编写测试用例测试这个算法,测试100万KV数据,10个服务器节点的情况下,计算这些KV数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性
func test(spots int) { nodeNumber := 10 hr := NewHashRing(spots) hr.AddNode(Node{"127.0.0.1"}) hr.AddNode(Node{"127.0.0.2"}) hr.AddNode(Node{"127.0.0.3"}) hr.AddNode(Node{"127.0.0.4"}) hr.AddNode(Node{"127.0.0.5"}) hr.AddNode(Node{"127.0.0.6"}) hr.AddNode(Node{"127.0.0.7"}) hr.AddNode(Node{"127.0.0.8"}) hr.AddNode(Node{"127.0.0.9"}) hr.AddNode(Node{"127.0.0.10"}) counter := make(map[string]int) for i := 1; i <= 1000000; i++ { key := fmt.Sprintf("test-key-%d", i) n := hr.GetNode(key) counter[n.ip] +=1 } var total int for _, num := range counter { total += num } avg := total/nodeNumber for _, num := range counter { total += (num-avg) * (num-avg) } variance := total/nodeNumber sd := math.Sqrt(float64(variance)) fmt.Printf("virtualnodes %d, standard deviation %v \n", spots, sd)}func main() { test(10) test(50) test(100) test(500) test(1000) test(2000)}
输出结果如下,
virtualnodes 10, standard deviation 25382.104640868536virtualnodes 50, standard deviation 12820.482323220136virtualnodes 100, standard deviation 10258.676132913057virtualnodes 500, standard deviation 2474.0501207534176virtualnodes 1000, standard deviation 2845.1342674819407virtualnodes 2000, standard deviation 1385.7553896701972
可见随着虚拟节点数的增加,均衡性会比价好
划线
评论
复制
发布于: 2020 年 07 月 09 日阅读数: 75
aoeiuvzcs
关注
还未添加个人签名 2018.03.25 加入
还未添加个人简介
评论