写点什么

架构师训练营第五周命题作业

发布于: 2020 年 10 月 25 日

1. 用你熟悉的编程语言实现一致性 hash 算法。

2. 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。


字符串生成 hash 值和标准差的计算方法,在网上搜了下写法。其他是自己写的。


程序:

package main
import ( "fmt" "hash/crc32" "log" "math" "sort")
type RealNode struct { Name string VirtualNodes []*VirtualNode}
type VirtualNode struct { RealNode *RealNode Name string Hash int}
type Distribution struct { StdDev float64 Exp float64}
func (node *RealNode) Show() { log.Println("RealNode begin") log.Printf("RealNode: %+v", node) for _, child := range node.VirtualNodes { log.Printf("VirtualNode: %+v", child) } log.Println("RealNode end")}
func (node *RealNode) SetupVirtualNodes(size int) { node.VirtualNodes = make([]*VirtualNode, size) index := 0 for index < size { name := generateVirtualNodeName(node, index) node.VirtualNodes[index] = &VirtualNode{ RealNode: node, Name: name, Hash: Hash(name), } index++ }}
func main() { testOneRound(10, 50) testOneRound(10, 60) testOneRound(10, 70) testOneRound(10, 80) testOneRound(10, 90) testOneRound(10, 100) testOneRound(10, 110) testOneRound(10, 120) testOneRound(10, 130) testOneRound(10, 140) testOneRound(10, 150) testOneRound(10, 160) testOneRound(10, 170) testOneRound(10, 180) testOneRound(10, 190) testOneRound(10, 200) testOneRound(10, 300) testOneRound(10, 400) testOneRound(10, 500) testOneRound(10, 1000) testOneRound(10, 1500) testOneRound(10, 2000)}
func testOneRound(realNodeCount int, virtualNodeCount int) { log.Println("virtualNodeCount: ", virtualNodeCount) realNodes := initRealNodes(realNodeCount, virtualNodeCount) virtualNodes, _ := getSortedVirtualNodes(realNodes) assertNoDuplicateHash(virtualNodes) mockData := generateMockData(10000 * 100) node2Times := hitNodes(mockData, virtualNodes) showDistribution(node2Times) log.Println()}
func generateMockData(count int) []string { data := make([]string, count) n := 0 prefix := "test-string-%d" for n < count { data[n] = fmt.Sprintf(prefix, n) n++ }
return data}
func hitNodes(source []string, virtualNodes []*VirtualNode) map[string]int { node2Times := make(map[string]int, 0) n := 0 for _, str := range source { targetNode := findNode(str, virtualNodes) if v, found := node2Times[targetNode.RealNode.Name]; found { node2Times[targetNode.RealNode.Name] = v + 1 } else { node2Times[targetNode.RealNode.Name] = 1 } n++ }
return node2Times}
func showDistribution(node2Times map[string]int) { hitTimes := make([]float64, 0) for _, v := range node2Times { hitTimes = append(hitTimes, float64(v)) }
distribution := distribution(hitTimes) log.Printf("stddev: %.2f, exp: %.2f", distribution.StdDev, distribution.Exp)}func initRealNodes(realNodeCount int, virtualNodeCount int) []*RealNode { nodes := make([]*RealNode, realNodeCount) index := 0 for index < realNodeCount { node := &RealNode{Name: generateRealNodeName(index)} node.SetupVirtualNodes(virtualNodeCount) nodes[index] = node index++ }
return nodes}
func getSortedVirtualNodes(nodes []*RealNode) ([]*VirtualNode, map[int]*VirtualNode) { virtualNodes := make([]*VirtualNode, 0) hash2VirtualNode := make(map[int]*VirtualNode, 0) for _, node := range nodes { for _, vnode := range node.VirtualNodes { virtualNodes = append(virtualNodes, vnode) hash2VirtualNode[vnode.Hash] = vnode } } sort.Slice(virtualNodes, func(i, j int) bool { return virtualNodes[i].Hash < virtualNodes[j].Hash }) return virtualNodes, hash2VirtualNode}
func assertNoDuplicateHash(nodes []*VirtualNode) { hash := make(map[int]struct{}, 0) for _, node := range nodes { if _, found := hash[node.Hash]; found { panic(fmt.Sprintf("Duplicate hash %d", node.Hash)) return } else { hash[node.Hash] = struct{}{} } }}
func findNode(value string, virtualNodes []*VirtualNode) *VirtualNode { hash := Hash(value) for _, node := range virtualNodes { if hash <= node.Hash { return node } } //value比最后一个节点的hash大,所以要落到环的第一个节点 return virtualNodes[0]}
func generateRealNodeName(index int) string { return fmt.Sprintf("RealNode-%d", index)}
func generateVirtualNodeName(realNode *RealNode, childIndex int) string { return fmt.Sprintf("%s-%d", realNode.Name, childIndex)}
// https://blog.csdn.net/lanyang123456/article/details/79827101?utm_medium=distribute.pc_relevant.none-task-blog-title-9&spm=1001.2101.3001.4242// Hash hashes a string to a unique hashcode.// crc32 returns a uint32, but for our use we need a non negative integer. Here we cast to an integer and invert it if the result is negative.func Hash(s string) int { v := int(crc32.ChecksumIEEE([]byte(s))) if v >= 0 { return v } else { return -v }}
func distribution(dataSource []float64) *Distribution { //数学期望 var sum float64 = 0 for _, v := range dataSource { sum += v } exp := float64(sum) / float64(len(dataSource))
//标准差 var variance float64 for _, v := range dataSource { variance += math.Pow((v - exp), 2) } stddev := math.Sqrt(variance / float64(len(dataSource))) //fmt.Println("stddev:", stddev) //fmt.Println("exp:", exp)
//正态分布公式 //a := 1 / (math.Sqrt(2*math.Pi) * stddev) * math.Pow(math.E, (-math.Pow((exp-exp), 2)/(2*math.Pow(stddev, 2)))) //fmt.Println(a)
return &Distribution{ StdDev: stddev, Exp: exp, }}
复制代码


测试结果:


//虚拟节点数	标准差//50	15438.54//60	16700.24//70	17957.28//80	18598.59//90	17580.13//100	17371.79//110	16866.18//120	14568.59//130	13964.82//140	13772.18//150	15097.51//160	17097.60//170	17416.83//180	18204.47//190	16835.38//200	17621.08//300	18522.42//400	14791.96//500	14673.12//1000	11938.21//1500	13548.18//2000	11840.77
复制代码


发布于: 2020 年 10 月 25 日阅读数: 39
用户头像

还未添加个人签名 2018.07.26 加入

还未添加个人简介

评论

发布
暂无评论
架构师训练营第五周命题作业