架构师训练营第 0 期第 5 周作业

发布于: 23 小时前
架构师训练营第 0 期第5周作业

作业一:

  • 用你熟悉的编程语言实现一致性 hash 算法。

  • 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。

测试用例

package main
import (
"encoding/base64"
"fmt"
"log"
"math/rand"
"strconv"
"github.com/carbocation/runningvariance"
"stathat.com/c/consistent"
)
func main() {
// 一致性哈希
c := consistent.New()
// 虚拟节点数
c.NumberOfReplicas = 100
// 10 个服务器节点
numberOfServers := 10
for i := 0; i < numberOfServers; i++ {
c.Add("Server" + strconv.Itoa(i))
}
// 100 万 KV 数据
pairs := map[string]int{}
numberOfPairs := 1000000
g := make([]byte, 12)
for i := 0; i < numberOfPairs; i++ {
_, err := rand.Read(g)
if err != nil {
log.Fatal(err)
}
s := base64.StdEncoding.EncodeToString(g)
r, err := c.Get(s)
if err != nil {
log.Fatal(err)
}
pairs[r] += 1
}
// 统计信息
r := runningvariance.NewRunningStat()
for k, v := range pairs {
fmt.Printf("%s: %d\n", k, v)
r.Push(float64(v))
}
// 平均值
mean := r.Mean()
// 方差
variance := r.Variance()
// 标准差
stdev := r.StandardDeviation()
fmt.Printf("Mean: %f, Variance: %f, StdDev: %f\n", mean, variance, stdev)
}
// Output
// Server6: 107081
// Server9: 93514
// Server8: 83825
// Server3: 110271
// Server5: 116223
// Server7: 108069
// Server2: 108724
// Server1: 84023
// Server4: 110884
// Server0: 77386
// Mean: 100000.000000, Variance: 194317163.333333, StdDev: 13939.769128

一致性哈希,来自:https://github.com/stathat/consistent/blob/master/consistent.go

// Copyright (C) 2012 Numerotron Inc.
// Use of this source code is governed by an MIT-style license
// that can be found in the LICENSE file.
// Package consistent provides a consistent hashing function.
//
// Consistent hashing is often used to distribute requests to a changing set of servers. For example,
// say you have some cache servers cacheA, cacheB, and cacheC. You want to decide which cache server
// to use to look up information on a user.
//
// You could use a typical hash table and hash the user id
// to one of cacheA, cacheB, or cacheC. But with a typical hash table, if you add or remove a server,
// almost all keys will get remapped to different results, which basically could bring your service
// to a grinding halt while the caches get rebuilt.
//
// With a consistent hash, adding or removing a server drastically reduces the number of keys that
// get remapped.
//
// Read more about consistent hashing on wikipedia: http://en.wikipedia.org/wiki/Consistent_hashing
//
package consistent // import "stathat.com/c/consistent"
import (
"errors"
"hash/crc32"
"hash/fnv"
"sort"
"strconv"
"sync"
)
type uints []uint32
// Len returns the length of the uints array.
func (x uints) Len() int { return len(x) }
// Less returns true if element i is less than element j.
func (x uints) Less(i, j int) bool { return x[i] < x[j] }
// Swap exchanges elements i and j.
func (x uints) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
// ErrEmptyCircle is the error returned when trying to get an element when nothing has been added to hash.
var ErrEmptyCircle = errors.New("empty circle")
// Consistent holds the information about the members of the consistent hash circle.
type Consistent struct {
circle map[uint32]string
members map[string]bool
sortedHashes uints
NumberOfReplicas int
count int64
scratch [64]byte
UseFnv bool
sync.RWMutex
}
// New creates a new Consistent object with a default setting of 20 replicas for each entry.
//
// To change the number of replicas, set NumberOfReplicas before adding entries.
func New() *Consistent {
c := new(Consistent)
c.NumberOfReplicas = 20
c.circle = make(map[uint32]string)
c.members = make(map[string]bool)
return c
}
// eltKey generates a string key for an element with an index.
func (c *Consistent) eltKey(elt string, idx int) string {
// return elt + "|" + strconv.Itoa(idx)
return strconv.Itoa(idx) + elt
}
// Add inserts a string element in the consistent hash.
func (c *Consistent) Add(elt string) {
c.Lock()
defer c.Unlock()
c.add(elt)
}
// need c.Lock() before calling
func (c *Consistent) add(elt string) {
for i := 0; i < c.NumberOfReplicas; i++ {
c.circle[c.hashKey(c.eltKey(elt, i))] = elt
}
c.members[elt] = true
c.updateSortedHashes()
c.count++
}
// Remove removes an element from the hash.
func (c *Consistent) Remove(elt string) {
c.Lock()
defer c.Unlock()
c.remove(elt)
}
// need c.Lock() before calling
func (c *Consistent) remove(elt string) {
for i := 0; i < c.NumberOfReplicas; i++ {
delete(c.circle, c.hashKey(c.eltKey(elt, i)))
}
delete(c.members, elt)
c.updateSortedHashes()
c.count--
}
// Set sets all the elements in the hash. If there are existing elements not
// present in elts, they will be removed.
func (c *Consistent) Set(elts []string) {
c.Lock()
defer c.Unlock()
for k := range c.members {
found := false
for _, v := range elts {
if k == v {
found = true
break
}
}
if !found {
c.remove(k)
}
}
for _, v := range elts {
_, exists := c.members[v]
if exists {
continue
}
c.add(v)
}
}
func (c *Consistent) Members() []string {
c.RLock()
defer c.RUnlock()
var m []string
for k := range c.members {
m = append(m, k)
}
return m
}
// Get returns an element close to where name hashes to in the circle.
func (c *Consistent) Get(name string) (string, error) {
c.RLock()
defer c.RUnlock()
if len(c.circle) == 0 {
return "", ErrEmptyCircle
}
key := c.hashKey(name)
i := c.search(key)
return c.circle[c.sortedHashes[i]], nil
}
func (c *Consistent) search(key uint32) (i int) {
f := func(x int) bool {
return c.sortedHashes[x] > key
}
i = sort.Search(len(c.sortedHashes), f)
if i >= len(c.sortedHashes) {
i = 0
}
return
}
// GetTwo returns the two closest distinct elements to the name input in the circle.
func (c *Consistent) GetTwo(name string) (string, string, error) {
c.RLock()
defer c.RUnlock()
if len(c.circle) == 0 {
return "", "", ErrEmptyCircle
}
key := c.hashKey(name)
i := c.search(key)
a := c.circle[c.sortedHashes[i]]
if c.count == 1 {
return a, "", nil
}
start := i
var b string
for i = start + 1; i != start; i++ {
if i >= len(c.sortedHashes) {
i = 0
}
b = c.circle[c.sortedHashes[i]]
if b != a {
break
}
}
return a, b, nil
}
// GetN returns the N closest distinct elements to the name input in the circle.
func (c *Consistent) GetN(name string, n int) ([]string, error) {
c.RLock()
defer c.RUnlock()
if len(c.circle) == 0 {
return nil, ErrEmptyCircle
}
if c.count < int64(n) {
n = int(c.count)
}
var (
key = c.hashKey(name)
i = c.search(key)
start = i
res = make([]string, 0, n)
elem = c.circle[c.sortedHashes[i]]
)
res = append(res, elem)
if len(res) == n {
return res, nil
}
for i = start + 1; i != start; i++ {
if i >= len(c.sortedHashes) {
i = 0
}
elem = c.circle[c.sortedHashes[i]]
if !sliceContainsMember(res, elem) {
res = append(res, elem)
}
if len(res) == n {
break
}
}
return res, nil
}
func (c *Consistent) hashKey(key string) uint32 {
if c.UseFnv {
return c.hashKeyFnv(key)
}
return c.hashKeyCRC32(key)
}
func (c *Consistent) hashKeyCRC32(key string) uint32 {
if len(key) < 64 {
var scratch [64]byte
copy(scratch[:], key)
return crc32.ChecksumIEEE(scratch[:len(key)])
}
return crc32.ChecksumIEEE([]byte(key))
}
func (c *Consistent) hashKeyFnv(key string) uint32 {
h := fnv.New32a()
h.Write([]byte(key))
return h.Sum32()
}
func (c *Consistent) updateSortedHashes() {
hashes := c.sortedHashes[:0]
//reallocate if we're holding on to too much (1/4th)
if cap(c.sortedHashes)/(c.NumberOfReplicas*4) > len(c.circle) {
hashes = nil
}
for k := range c.circle {
hashes = append(hashes, k)
}
sort.Sort(hashes)
c.sortedHashes = hashes
}
func sliceContainsMember(set []string, member string) bool {
for _, m := range set {
if m == member {
return true
}
}
return false
}

标准差,来自:https://github.com/carbocation/runningvariance/blob/master/runningvariance.go

/*
runningvariance computes accurate running mean, variance, and standard deviation
It is based on code by John D Cook: http://www.johndcook.com/blog/standard_deviation/
*/
package runningvariance
import (
"math"
)
type RunningStat struct {
N uint
NewM float64
OldM float64
NewS float64
OldS float64
}
func NewRunningStat() *RunningStat {
return &RunningStat{}
}
func (r *RunningStat) Push(x float64) {
r.N++
// See Knuth TAOCP vol 2, 3rd edition, page 232
if r.N == 1 {
r.OldM = x
r.NewM = x
r.OldS = 0.0
} else {
r.NewM = r.OldM + (x-r.OldM)/float64(r.N)
r.NewS = r.OldS + (x-r.OldM)*(x-r.NewM)
// set up for next iteration
r.OldM = r.NewM
r.OldS = r.NewS
}
}
func (r *RunningStat) NumDataValues() uint {
return r.N
}
func (r *RunningStat) Mean() float64 {
return r.NewM
}
func (r *RunningStat) Variance() float64 {
if r.N > 1 {
return r.NewS / (float64(r.N) - 1)
}
return 0.0
}
func (r *RunningStat) StandardDeviation() float64 {
return math.Sqrt(r.Variance())
}

作业二:

根据当周学习情况,完成一篇学习总结

本周几个主题

  1. 分布式缓存架构

  2. 消息队列与异步架构

  3. 负载均衡架构

  4. 分布式数据库

用户头像

无名氏

关注

还未添加个人签名 2017.09.11 加入

还未添加个人简介

评论

发布
暂无评论
架构师训练营第 0 期第5周作业