架构师训练营第 0 期第 5 周作业
发布于: 2020 年 07 月 08 日
作业一:
用你熟悉的编程语言实现一致性 hash 算法。
编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。
测试用例
package mainimport ( "encoding/base64" "fmt" "log" "math/rand" "strconv" "github.com/carbocation/runningvariance" "stathat.com/c/consistent")func main() { // 一致性哈希 c := consistent.New() // 虚拟节点数 c.NumberOfReplicas = 100 // 10 个服务器节点 numberOfServers := 10 for i := 0; i < numberOfServers; i++ { c.Add("Server" + strconv.Itoa(i)) } // 100 万 KV 数据 pairs := map[string]int{} numberOfPairs := 1000000 g := make([]byte, 12) for i := 0; i < numberOfPairs; i++ { _, err := rand.Read(g) if err != nil { log.Fatal(err) } s := base64.StdEncoding.EncodeToString(g) r, err := c.Get(s) if err != nil { log.Fatal(err) } pairs[r] += 1 } // 统计信息 r := runningvariance.NewRunningStat() for k, v := range pairs { fmt.Printf("%s: %d\n", k, v) r.Push(float64(v)) } // 平均值 mean := r.Mean() // 方差 variance := r.Variance() // 标准差 stdev := r.StandardDeviation() fmt.Printf("Mean: %f, Variance: %f, StdDev: %f\n", mean, variance, stdev)}// Output// Server6: 107081// Server9: 93514// Server8: 83825// Server3: 110271// Server5: 116223// Server7: 108069// Server2: 108724// Server1: 84023// Server4: 110884// Server0: 77386// Mean: 100000.000000, Variance: 194317163.333333, StdDev: 13939.769128
一致性哈希,来自:https://github.com/stathat/consistent/blob/master/consistent.go
// Copyright (C) 2012 Numerotron Inc.// Use of this source code is governed by an MIT-style license// that can be found in the LICENSE file.// Package consistent provides a consistent hashing function.//// Consistent hashing is often used to distribute requests to a changing set of servers. For example,// say you have some cache servers cacheA, cacheB, and cacheC. You want to decide which cache server// to use to look up information on a user.//// You could use a typical hash table and hash the user id// to one of cacheA, cacheB, or cacheC. But with a typical hash table, if you add or remove a server,// almost all keys will get remapped to different results, which basically could bring your service// to a grinding halt while the caches get rebuilt.//// With a consistent hash, adding or removing a server drastically reduces the number of keys that// get remapped.//// Read more about consistent hashing on wikipedia: http://en.wikipedia.org/wiki/Consistent_hashing//package consistent // import "stathat.com/c/consistent"import ( "errors" "hash/crc32" "hash/fnv" "sort" "strconv" "sync")type uints []uint32// Len returns the length of the uints array.func (x uints) Len() int { return len(x) }// Less returns true if element i is less than element j.func (x uints) Less(i, j int) bool { return x[i] < x[j] }// Swap exchanges elements i and j.func (x uints) Swap(i, j int) { x[i], x[j] = x[j], x[i] }// ErrEmptyCircle is the error returned when trying to get an element when nothing has been added to hash.var ErrEmptyCircle = errors.New("empty circle")// Consistent holds the information about the members of the consistent hash circle.type Consistent struct { circle map[uint32]string members map[string]bool sortedHashes uints NumberOfReplicas int count int64 scratch [64]byte UseFnv bool sync.RWMutex}// New creates a new Consistent object with a default setting of 20 replicas for each entry.//// To change the number of replicas, set NumberOfReplicas before adding entries.func New() *Consistent { c := new(Consistent) c.NumberOfReplicas = 20 c.circle = make(map[uint32]string) c.members = make(map[string]bool) return c}// eltKey generates a string key for an element with an index.func (c *Consistent) eltKey(elt string, idx int) string { // return elt + "|" + strconv.Itoa(idx) return strconv.Itoa(idx) + elt}// Add inserts a string element in the consistent hash.func (c *Consistent) Add(elt string) { c.Lock() defer c.Unlock() c.add(elt)}// need c.Lock() before callingfunc (c *Consistent) add(elt string) { for i := 0; i < c.NumberOfReplicas; i++ { c.circle[c.hashKey(c.eltKey(elt, i))] = elt } c.members[elt] = true c.updateSortedHashes() c.count++}// Remove removes an element from the hash.func (c *Consistent) Remove(elt string) { c.Lock() defer c.Unlock() c.remove(elt)}// need c.Lock() before callingfunc (c *Consistent) remove(elt string) { for i := 0; i < c.NumberOfReplicas; i++ { delete(c.circle, c.hashKey(c.eltKey(elt, i))) } delete(c.members, elt) c.updateSortedHashes() c.count--}// Set sets all the elements in the hash. If there are existing elements not// present in elts, they will be removed.func (c *Consistent) Set(elts []string) { c.Lock() defer c.Unlock() for k := range c.members { found := false for _, v := range elts { if k == v { found = true break } } if !found { c.remove(k) } } for _, v := range elts { _, exists := c.members[v] if exists { continue } c.add(v) }}func (c *Consistent) Members() []string { c.RLock() defer c.RUnlock() var m []string for k := range c.members { m = append(m, k) } return m}// Get returns an element close to where name hashes to in the circle.func (c *Consistent) Get(name string) (string, error) { c.RLock() defer c.RUnlock() if len(c.circle) == 0 { return "", ErrEmptyCircle } key := c.hashKey(name) i := c.search(key) return c.circle[c.sortedHashes[i]], nil}func (c *Consistent) search(key uint32) (i int) { f := func(x int) bool { return c.sortedHashes[x] > key } i = sort.Search(len(c.sortedHashes), f) if i >= len(c.sortedHashes) { i = 0 } return}// GetTwo returns the two closest distinct elements to the name input in the circle.func (c *Consistent) GetTwo(name string) (string, string, error) { c.RLock() defer c.RUnlock() if len(c.circle) == 0 { return "", "", ErrEmptyCircle } key := c.hashKey(name) i := c.search(key) a := c.circle[c.sortedHashes[i]] if c.count == 1 { return a, "", nil } start := i var b string for i = start + 1; i != start; i++ { if i >= len(c.sortedHashes) { i = 0 } b = c.circle[c.sortedHashes[i]] if b != a { break } } return a, b, nil}// GetN returns the N closest distinct elements to the name input in the circle.func (c *Consistent) GetN(name string, n int) ([]string, error) { c.RLock() defer c.RUnlock() if len(c.circle) == 0 { return nil, ErrEmptyCircle } if c.count < int64(n) { n = int(c.count) } var ( key = c.hashKey(name) i = c.search(key) start = i res = make([]string, 0, n) elem = c.circle[c.sortedHashes[i]] ) res = append(res, elem) if len(res) == n { return res, nil } for i = start + 1; i != start; i++ { if i >= len(c.sortedHashes) { i = 0 } elem = c.circle[c.sortedHashes[i]] if !sliceContainsMember(res, elem) { res = append(res, elem) } if len(res) == n { break } } return res, nil}func (c *Consistent) hashKey(key string) uint32 { if c.UseFnv { return c.hashKeyFnv(key) } return c.hashKeyCRC32(key)}func (c *Consistent) hashKeyCRC32(key string) uint32 { if len(key) < 64 { var scratch [64]byte copy(scratch[:], key) return crc32.ChecksumIEEE(scratch[:len(key)]) } return crc32.ChecksumIEEE([]byte(key))}func (c *Consistent) hashKeyFnv(key string) uint32 { h := fnv.New32a() h.Write([]byte(key)) return h.Sum32()}func (c *Consistent) updateSortedHashes() { hashes := c.sortedHashes[:0] //reallocate if we're holding on to too much (1/4th) if cap(c.sortedHashes)/(c.NumberOfReplicas*4) > len(c.circle) { hashes = nil } for k := range c.circle { hashes = append(hashes, k) } sort.Sort(hashes) c.sortedHashes = hashes}func sliceContainsMember(set []string, member string) bool { for _, m := range set { if m == member { return true } } return false}
标准差,来自:https://github.com/carbocation/runningvariance/blob/master/runningvariance.go
/*runningvariance computes accurate running mean, variance, and standard deviationIt is based on code by John D Cook: http://www.johndcook.com/blog/standard_deviation/*/package runningvarianceimport ( "math")type RunningStat struct { N uint NewM float64 OldM float64 NewS float64 OldS float64}func NewRunningStat() *RunningStat { return &RunningStat{}}func (r *RunningStat) Push(x float64) { r.N++ // See Knuth TAOCP vol 2, 3rd edition, page 232 if r.N == 1 { r.OldM = x r.NewM = x r.OldS = 0.0 } else { r.NewM = r.OldM + (x-r.OldM)/float64(r.N) r.NewS = r.OldS + (x-r.OldM)*(x-r.NewM) // set up for next iteration r.OldM = r.NewM r.OldS = r.NewS }}func (r *RunningStat) NumDataValues() uint { return r.N}func (r *RunningStat) Mean() float64 { return r.NewM}func (r *RunningStat) Variance() float64 { if r.N > 1 { return r.NewS / (float64(r.N) - 1) } return 0.0}func (r *RunningStat) StandardDeviation() float64 { return math.Sqrt(r.Variance())}
作业二:
根据当周学习情况,完成一篇学习总结
本周几个主题
分布式缓存架构
消息队列与异步架构
负载均衡架构
分布式数据库
划线
评论
复制
发布于: 2020 年 07 月 08 日阅读数: 50
无名氏
关注
还未添加个人签名 2017.09.11 加入
还未添加个人简介
评论