写点什么

架构师训练营 第五周 作业

用户头像
一雄
关注
发布于: 2020 年 07 月 08 日
  1. 用你熟悉的编程语言实现一致性hash算法。

  2. 编写测试用例测试这个算法,测试100wkv数据,10个服务器结点的情况下,计算这些kv数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。



hash实现源码如下:

type serverNode struct {
ipAddress string
port int
name string
}
func (n *serverNode) String() string {
return fmt.Sprintf("%s:(%s:%d)", n.name, n.ipAddress, n.port)
}
func newServerNode(name, ipAddress string, port int) *serverNode {
return &serverNode{
name: name,
ipAddress: ipAddress,
port: port,
}
}
var hashFunc = getHashFunc()
func getHashFunc() func(s string) uint32 {
hashHelper := fnv.New32a()
return func(s string) uint32 {
hashHelper.Reset()
hashHelper.Write([]byte(s))
return hashHelper.Sum32()
}
}
type consistentHashingEnv struct {
nodeMap map[uint32]*serverNode
hashValues []uint32
virtualCount int
}
func newEnv(nodes []*serverNode, virtualCount int) *consistentHashingEnv {
result := &consistentHashingEnv{
nodeMap: make(map[uint32]*serverNode),
virtualCount: virtualCount,
}
for i := 0; i < len(nodes); i++ {
for j := 0; j < result.virtualCount; j++ {
hash := hashFunc(fmt.Sprintf("%s%d", nodes[i], j))
result.nodeMap[hash] = nodes[i]
result.hashValues = append(result.hashValues, hash)
}
}
sort.SliceStable(result.hashValues, func(a, b int) bool {
return result.hashValues[a] < result.hashValues[b]
})
return result
}
func (c *consistentHashingEnv) GetNode(key string) *serverNode {
hash := hashFunc(key)
low, high := 0, len(c.hashValues)-1
for low <= high {
mid := low + (high-low)/2
midVal := c.hashValues[mid]
if midVal >= hash {
if mid == 0 || c.hashValues[mid-1] <= hash {
return c.nodeMap[midVal]
}
high = mid - 1
} else {
low = mid + 1
}
}
return c.nodeMap[c.hashValues[0]]
}
func getVariance(data []float64) float64 {
var variance float64
sum := func(data []float64) float64 {
var result float64
for i := 0; i < len(data); i++ {
result += data[i]
}
return result
}
expect := sum(data) / float64(len(data))
for i := 0; i < len(data); i++ {
variance += math.Pow(data[i]-expect, 2)
}
return math.Sqrt(variance / float64(len(data)))
}



测试用例源码如下:

func TestWeek5(t *testing.T) {
nodes := make([]*serverNode, 10)
serverKeyMap := make(map[*serverNode][]string)
// generates keys
keys := make([]string, 1000000)
for i := 0; i < len(keys); i++ {
keys[i] = fmt.Sprint("key", i*17, "ss", i*19)
}
// generates nodes
for i := 0; i < len(nodes); i++ {
nodes[i] = newServerNode(fmt.Sprint("mynode", i), fmt.Sprint("10.1.33.2", i), 80)
}
ce := newEnv(nodes, 160)
// make results
for i := 0; i < len(keys); i++ {
n := ce.GetNode(keys[i])
if _, found := serverKeyMap[n]; !found {
serverKeyMap[n] = make([]string, 0)
}
serverKeyMap[n] = append(serverKeyMap[n], keys[i])
}
// calculate result
load := make([]float64, len(nodes))
i := 0
for _, eachKeys := range serverKeyMap {
load[i] = float64(len(eachKeys))
i++
}
fmt.Printf("nodes: %d,virtual count: %d, variance %v", len(nodes), ce.virtualCount, getVariance(load))
}



测试结果如下:



nodes: 10,virtual count: 50, variance 71427.23109571027
nodes: 10,virtual count: 100, variance 72017.02570920296
nodes: 10,virtual count: 150, variance 29348.974367088194
nodes: 10,virtual count: 160, variance 29169.9588515308
nodes: 10,virtual count: 250, variance 22715.88457885803
nodes: 10,virtual count: 450, variance 15258.98646699708



分析如上数据,150-160左右的虚拟结点数的收益最大。

发布于: 2020 年 07 月 08 日阅读数: 45
用户头像

一雄

关注

还未添加个人签名 2020.03.05 加入

还未添加个人简介

评论

发布
暂无评论
架构师训练营 第五周 作业