架构师训练营第 5 周作业

用户头像
aoeiuvzcs
关注
发布于: 2020 年 07 月 09 日
  1. 用你熟悉的编程语言实现一致性hash算法



package main
import (
"crypto/md5"
"fmt"
"math"
"sort"
"sync"
)
const defaultVirtualSpots = 400
type Node struct {
ip string
}
type virtualNode struct {
node *Node
spotValue uint32
}
type HashRing struct {
spots int
vNodes []virtualNode
nodes []Node
mu sync.RWMutex
}
func NewHashRing(spots int) *HashRing {
if spots == 0 {
spots = defaultVirtualSpots
}
return &HashRing{
spots: spots,
vNodes: make([]virtualNode, 0),
nodes: make([]Node, 0),
}
}
func (h *HashRing)AddNode(n Node) {
h.mu.Lock()
defer h.mu.Unlock()
h.nodes = append(h.nodes, n)
h.generate()
}
func (h *HashRing)RemoveNode(ip string) {
h.mu.Lock()
defer h.mu.Unlock()
removed := false
for i := range h.nodes {
if h.nodes[i].ip == ip {
removed = true
h.nodes = append(h.nodes[:i], h.nodes[i+1:]...)
break
}
}
if removed {
h.generate()
}
}
func (h *HashRing)generate() {
var vNodes []virtualNode
for i, n := range h.nodes {
for j := 1; j <= h.spots; j++ {
data := fmt.Sprintf("%s-%d", n.ip, j)
value := md5hash([]byte(data))
vn := virtualNode {
node: &h.nodes[i],
spotValue: value,
}
vNodes = append(vNodes, vn)
}
}
sort.Slice(vNodes, func(i, j int) bool {return vNodes[i].spotValue < vNodes[j].spotValue })
h.vNodes = vNodes
}
func (h *HashRing)GetNode(key string) Node {
h.mu.RLock()
defer h.mu.RUnlock()
value := md5hash([]byte(key))
i := sort.Search(len(h.vNodes), func(i int)bool {return h.vNodes[i].spotValue >= value })
var n Node
if i < len(h.vNodes) {
n = *h.vNodes[i].node
} else {
n = *h.vNodes[0].node
}
return n
}
func md5hash(data []byte) uint32 {
h := md5.New()
h.Write(data)
r := h.Sum(nil)
return (uint32(r[3]) << 24) | (uint32(r[2]) << 16) | (uint32(r[1]) << 8) | uint32(r[0])
}



  1. 编写测试用例测试这个算法,测试100万KV数据,10个服务器节点的情况下,计算这些KV数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性



func test(spots int) {
nodeNumber := 10
hr := NewHashRing(spots)
hr.AddNode(Node{"127.0.0.1"})
hr.AddNode(Node{"127.0.0.2"})
hr.AddNode(Node{"127.0.0.3"})
hr.AddNode(Node{"127.0.0.4"})
hr.AddNode(Node{"127.0.0.5"})
hr.AddNode(Node{"127.0.0.6"})
hr.AddNode(Node{"127.0.0.7"})
hr.AddNode(Node{"127.0.0.8"})
hr.AddNode(Node{"127.0.0.9"})
hr.AddNode(Node{"127.0.0.10"})
counter := make(map[string]int)
for i := 1; i <= 1000000; i++ {
key := fmt.Sprintf("test-key-%d", i)
n := hr.GetNode(key)
counter[n.ip] +=1
}
var total int
for _, num := range counter {
total += num
}
avg := total/nodeNumber
for _, num := range counter {
total += (num-avg) * (num-avg)
}
variance := total/nodeNumber
sd := math.Sqrt(float64(variance))
fmt.Printf("virtualnodes %d, standard deviation %v \n", spots, sd)
}
func main() {
test(10)
test(50)
test(100)
test(500)
test(1000)
test(2000)
}



输出结果如下,

virtualnodes 10, standard deviation 25382.104640868536
virtualnodes 50, standard deviation 12820.482323220136
virtualnodes 100, standard deviation 10258.676132913057
virtualnodes 500, standard deviation 2474.0501207534176
virtualnodes 1000, standard deviation 2845.1342674819407
virtualnodes 2000, standard deviation 1385.7553896701972



可见随着虚拟节点数的增加,均衡性会比价好



用户头像

aoeiuvzcs

关注

还未添加个人签名 2018.03.25 加入

还未添加个人简介

评论

发布
暂无评论
架构师训练营第5周作业