架构师训练营 第五周 作业
发布于: 2020 年 07 月 08 日
用你熟悉的编程语言实现一致性hash算法。
编写测试用例测试这个算法,测试100wkv数据,10个服务器结点的情况下,计算这些kv数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。
hash实现源码如下:
type serverNode struct { ipAddress string port int name string}func (n *serverNode) String() string { return fmt.Sprintf("%s:(%s:%d)", n.name, n.ipAddress, n.port)}func newServerNode(name, ipAddress string, port int) *serverNode { return &serverNode{ name: name, ipAddress: ipAddress, port: port, }}var hashFunc = getHashFunc()func getHashFunc() func(s string) uint32 { hashHelper := fnv.New32a() return func(s string) uint32 { hashHelper.Reset() hashHelper.Write([]byte(s)) return hashHelper.Sum32() }}type consistentHashingEnv struct { nodeMap map[uint32]*serverNode hashValues []uint32 virtualCount int}func newEnv(nodes []*serverNode, virtualCount int) *consistentHashingEnv { result := &consistentHashingEnv{ nodeMap: make(map[uint32]*serverNode), virtualCount: virtualCount, } for i := 0; i < len(nodes); i++ { for j := 0; j < result.virtualCount; j++ { hash := hashFunc(fmt.Sprintf("%s%d", nodes[i], j)) result.nodeMap[hash] = nodes[i] result.hashValues = append(result.hashValues, hash) } } sort.SliceStable(result.hashValues, func(a, b int) bool { return result.hashValues[a] < result.hashValues[b] }) return result}func (c *consistentHashingEnv) GetNode(key string) *serverNode { hash := hashFunc(key) low, high := 0, len(c.hashValues)-1 for low <= high { mid := low + (high-low)/2 midVal := c.hashValues[mid] if midVal >= hash { if mid == 0 || c.hashValues[mid-1] <= hash { return c.nodeMap[midVal] } high = mid - 1 } else { low = mid + 1 } } return c.nodeMap[c.hashValues[0]]}func getVariance(data []float64) float64 { var variance float64 sum := func(data []float64) float64 { var result float64 for i := 0; i < len(data); i++ { result += data[i] } return result } expect := sum(data) / float64(len(data)) for i := 0; i < len(data); i++ { variance += math.Pow(data[i]-expect, 2) } return math.Sqrt(variance / float64(len(data)))}
测试用例源码如下:
func TestWeek5(t *testing.T) { nodes := make([]*serverNode, 10) serverKeyMap := make(map[*serverNode][]string) // generates keys keys := make([]string, 1000000) for i := 0; i < len(keys); i++ { keys[i] = fmt.Sprint("key", i*17, "ss", i*19) } // generates nodes for i := 0; i < len(nodes); i++ { nodes[i] = newServerNode(fmt.Sprint("mynode", i), fmt.Sprint("10.1.33.2", i), 80) } ce := newEnv(nodes, 160) // make results for i := 0; i < len(keys); i++ { n := ce.GetNode(keys[i]) if _, found := serverKeyMap[n]; !found { serverKeyMap[n] = make([]string, 0) } serverKeyMap[n] = append(serverKeyMap[n], keys[i]) } // calculate result load := make([]float64, len(nodes)) i := 0 for _, eachKeys := range serverKeyMap { load[i] = float64(len(eachKeys)) i++ } fmt.Printf("nodes: %d,virtual count: %d, variance %v", len(nodes), ce.virtualCount, getVariance(load))}
测试结果如下:
nodes: 10,virtual count: 50, variance 71427.23109571027nodes: 10,virtual count: 100, variance 72017.02570920296nodes: 10,virtual count: 150, variance 29348.974367088194nodes: 10,virtual count: 160, variance 29169.9588515308nodes: 10,virtual count: 250, variance 22715.88457885803nodes: 10,virtual count: 450, variance 15258.98646699708
分析如上数据,150-160左右的虚拟结点数的收益最大。
划线
评论
复制
发布于: 2020 年 07 月 08 日阅读数: 45
版权声明: 本文为 InfoQ 作者【一雄】的原创文章。
原文链接:【http://xie.infoq.cn/article/2e0b163cc19a333984f48903e】。
本文遵守【CC-BY 4.0】协议,转载请保留原文出处及本版权声明。
一雄
关注
还未添加个人签名 2020.03.05 加入
还未添加个人简介
评论