第五周作业

用户头像
倪惠华
关注
发布于: 2020 年 07 月 08 日



  • 用你熟悉的编程语言实现一致性 hash 算法。

  • 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。



package tests
import (
"errors"
"fmt"
"hash/crc32"
"math"
"sort"
"strconv"
"testing"
)
type Consistent struct {
nodesReplicas int //添加虚拟节点数量
hashSortNodes []uint32 //所有节点的排序数组
circle map[uint32]string //所有节点对应的node
nodes map[string]bool //node是否存在
}
func NewConsistent() *Consistent {
return &Consistent{
nodesReplicas: 200,
circle: make(map[uint32]string),
nodes: make(map[string]bool),
}
}
func (c *Consistent) AddNode(node string) error {
if _, ok := c.nodes[node]; ok { //判断新加节点是否存在
return fmt.Errorf("%s already existed", node)
}
c.nodes[node] = true
for i := 0; i < c.nodesReplicas; i++ { //添加虚拟节点
replicasKey := getReplicasKey(i, node) //虚拟节点
c.circle[replicasKey] = node
c.hashSortNodes = append(c.hashSortNodes, replicasKey) //所有节点ID
}
sort.Slice(c.hashSortNodes, func(i, j int) bool { //排序
return c.hashSortNodes[i] < c.hashSortNodes[j]
})
return nil
}
func (c *Consistent) GetNode(key string) (string, error) {
if len(c.nodes) == 0 {
return "", errors.New("not add node")
}
index := c.searchNearbyIndex(key)
host := c.circle[c.hashSortNodes[index]]
return host, nil
}
func (c *Consistent) searchNearbyIndex(key string) int {
hashKey := hashKey(key)
index := sort.Search(len(c.hashSortNodes), func(i int) bool { //key算出的节点,距离最近的node节点下标 sort.Search数组需要升序排列好
return c.hashSortNodes[i] >= hashKey
})
if index >= len(c.hashSortNodes) {
index = 0
}
return index
}
func getReplicasKey(i int, node string) uint32 {
return hashKey(fmt.Sprintf("%s#%d", node, i))
}
func hashKey(host string) uint32 {
return crc32.ChecksumIEEE([]byte(host))
}
func calSTD(dataDistributeMap map[string]int) float64 {
avg := 100000
var totalValue float64 = 0
for _, value := range dataDistributeMap {
totalValue = totalValue + math.Pow(math.Abs(float64(avg-value)), 2)
}
return math.Sqrt(totalValue / 10)
}
func TestAddData(t *testing.T) {
for nodesReplicas := 100; nodesReplicas <= 200; nodesReplicas++ {
consistent := NewConsistent()
consistent.nodesReplicas = nodesReplicas
for i := 0; i < 10; i++ {
consistent.AddNode("Node_" + strconv.Itoa(i))
}
dataDistributeMap := make(map[string]int)
for num := 0; num < 1000000; num++ {
key := fmt.Sprintf("test_key_%d", num)
node, _ := consistent.GetNode(key)
count := dataDistributeMap[node]
if count == 0 {
dataDistributeMap[node] = 1
} else {
dataDistributeMap[node] = count + 1
}
}
fmt.Printf("nodesReplicas=%v,std=%v\n", nodesReplicas, calSTD(dataDistributeMap))
consistent = nil
}
}



//测试用例结果,虚拟节点与标准差数据。这里比较关键的是散列函数是否可以把数据均匀的分散开
nodesReplicas=100,std=18637.422590047154
nodesReplicas=101,std=18336.45375747448
nodesReplicas=102,std=18183.738702478102
nodesReplicas=103,std=17865.044438791636
nodesReplicas=104,std=17723.902911040783
nodesReplicas=105,std=17508.814231694847
nodesReplicas=106,std=17244.224812962744
nodesReplicas=107,std=17411.35927491016
nodesReplicas=108,std=17552.303034075045
nodesReplicas=109,std=16764.82830809788
nodesReplicas=110,std=16520.800918841676
nodesReplicas=111,std=16218.94971322126
nodesReplicas=112,std=15999.956606191156
nodesReplicas=113,std=16317.221595602605
nodesReplicas=114,std=16725.957808149582
nodesReplicas=115,std=16635.338902469044
nodesReplicas=116,std=16729.724976818954
nodesReplicas=117,std=16284.605263868081
nodesReplicas=118,std=16238.897388677595
nodesReplicas=119,std=15772.494190837415
nodesReplicas=120,std=15352.573829817591
nodesReplicas=121,std=14919.7271422771
nodesReplicas=122,std=14904.551016384225
nodesReplicas=123,std=14875.371988626033
nodesReplicas=124,std=14932.096456961426
nodesReplicas=125,std=14966.524372745998
nodesReplicas=126,std=15324.912384741388
nodesReplicas=127,std=15977.175532615269
nodesReplicas=128,std=16049.002666832603
nodesReplicas=129,std=14840.80927038684
nodesReplicas=130,std=13749.909665157804
nodesReplicas=131,std=13691.367995930867
nodesReplicas=132,std=13762.821229675259
nodesReplicas=133,std=13907.914854499217
nodesReplicas=134,std=13517.73202870955
nodesReplicas=135,std=13590.586116867808
nodesReplicas=136,std=13925.114484269061
nodesReplicas=137,std=14399.252508376954
nodesReplicas=138,std=14444.596373730905
nodesReplicas=139,std=14218.760733622323
nodesReplicas=140,std=13931.093532095749
nodesReplicas=141,std=14141.265869786905
nodesReplicas=142,std=14510.716612214574
nodesReplicas=143,std=14847.132167526495
nodesReplicas=144,std=14938.868538145718
nodesReplicas=145,std=15491.923708823253
nodesReplicas=146,std=15959.540381853107
nodesReplicas=147,std=16162.299100066179
nodesReplicas=148,std=16184.426959271681
nodesReplicas=149,std=15832.311170514557
nodesReplicas=150,std=15647.809987343277
nodesReplicas=151,std=15789.255289594883
nodesReplicas=152,std=16171.53785513301
nodesReplicas=153,std=16341.903365275417
nodesReplicas=154,std=16532.26842874262
nodesReplicas=155,std=16774.0822401704
nodesReplicas=156,std=17103.775817052796
nodesReplicas=157,std=17533.222846926914
nodesReplicas=158,std=18121.72146348133
nodesReplicas=159,std=18015.368661229222
nodesReplicas=160,std=17677.801362160397
nodesReplicas=161,std=17691.103911288294
nodesReplicas=162,std=17915.48197509629
nodesReplicas=163,std=18381.738524960037
nodesReplicas=164,std=18474.200469844425
nodesReplicas=165,std=18035.491354548674
nodesReplicas=166,std=18185.099829255818
nodesReplicas=167,std=18212.419515264854
nodesReplicas=168,std=18143.330581786795
nodesReplicas=169,std=18255.693112013032
nodesReplicas=170,std=18472.942185802454
nodesReplicas=171,std=18520.654275699875
nodesReplicas=172,std=18612.134138781614
nodesReplicas=173,std=19346.22619013848
nodesReplicas=174,std=19317.97076299682
nodesReplicas=175,std=19339.8126723089
nodesReplicas=176,std=19394.424956672472
nodesReplicas=177,std=19446.01431656369
nodesReplicas=178,std=19081.96204272506
nodesReplicas=179,std=19225.92947037932
nodesReplicas=180,std=18931.6345675697
nodesReplicas=181,std=19091.27366625915
nodesReplicas=182,std=19109.542009163903
nodesReplicas=183,std=19296.986883967144
nodesReplicas=184,std=19628.85686432096
nodesReplicas=185,std=19611.381353693574
nodesReplicas=186,std=19639.257592892864
nodesReplicas=187,std=19618.200880814733
nodesReplicas=188,std=19762.10710425384
nodesReplicas=189,std=19914.156432046024
nodesReplicas=190,std=19986.262401960004
nodesReplicas=191,std=20223.049038164347
nodesReplicas=192,std=20460.3679487931
nodesReplicas=193,std=20661.062591260885
nodesReplicas=194,std=21146.09271236651
nodesReplicas=195,std=21306.66155454674
nodesReplicas=196,std=21309.25824143112
nodesReplicas=197,std=21638.773990224123
nodesReplicas=198,std=21904.441814390066
nodesReplicas=199,std=21973.870123398836
nodesReplicas=200,std=21764.37291079162



用户头像

倪惠华

关注

还未添加个人签名 2018.05.08 加入

还未添加个人简介

评论 (1 条评论)

发布
用户头像
标准差比较大,可以再加大虚拟节点到400左右,或者更换哈希方法
2020 年 07 月 13 日 22:27
回复
没有更多了
第五周作业