极客大学架构师训练营 系统架构 一致性哈希 Consistent Hash 第五次作业

用户头像
John(易筋)
关注
发布于: 2020 年 07 月 08 日
  • 用你熟悉的编程语言实现一致性 hash 算法。

  • 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。

答:

1、首先选用Java的TreeMap,因为它基于红黑树实现,查找效率比较高,而且已经原生提供了ceilingEntry和firstEntry方法;

其次,虚拟结点的Hash计算必须是稳定不变的,否则服务重启可能导致虚拟结点跳变;

还有,过程中用Mock类,代替物理服务器完成缓存的实际读写操作;

如果直接采用String.hashCode,发现计算得到的hash值比较集中,不适合使用,于是去网上另外找了一个hash算法替代。



2、项目的2个关键流程简画如下:





3、类设计图如下:





  1. CacheImplement

package hash;

public class CacheImplement implements Cache {
private ConsistentHashRing ring = new ConsistentHashRing();
private String[] servers;

public CacheImplement(String[] servers, int virtualFactorPerNode) {
ring.addServers(servers, virtualFactorPerNode);
}

@Override
public String get(String key) {
Cache serverKey = ring.getServer(key);
return serverKey.get(key);
}

@Override
public void set(String key, String val) {
Cache serverKey = ring.getServer(key);
serverKey.set(key,val);
}

@Override
public String toString() {
return ring.toString();
}

static String[] ipServers = new String[] {
"192.168.0.1",
"192.168.0.2",
"192.168.0.3",
"192.168.0.4",
"192.168.0.5",
"192.168.0.6",
"192.168.0.7",
"192.168.0.8",
"192.168.0.9",
"192.168.0.10"
};

static int maxCacheNumber = 1_000_000;

public static void main(String[] args) {
testCache(10);
testCache(50);
testCache(100);
testCache(150);
testCache(200);
testCache(500);
testCache(1_000);
testCache(1_500);
testCache(2_000);
}

static void testCache(int virtualFactorPerNode) {
Cache cache = new CacheImplement(ipServers, virtualFactorPerNode);
fillCache(cache, maxCacheNumber);
System.out.println(cache);
}

static void fillCache(Cache cache, int cacheNumber) {
for (int i = 0; i < cacheNumber; i++) {
String key = "key" + i;
String val = "val" + key;
cache.set(key, val);
}
}
}



哈希环类 ConsistentHashRing

package hash;
import java.util.*;
public class ConsistentHashRing {
TreeMap<Integer, Cache> map = new TreeMap<Integer, Cache>();
List<Cache> servers = new ArrayList<>();
final String virtualKeyString = "&&Virtual";
public void addServers(String[] servers, int virtualFactorPerNode) {
if (servers == null) {
return;
}
for (String server: servers) {
Cache cacheServer = new CacheServerMock(server);
map.put(getHashCode(server), cacheServer);
// add virtual nodes
for (int i = 1; i <= virtualFactorPerNode; i++) {
map.put(getVirtualHashCode(server, i), cacheServer);
}
this.servers.add(cacheServer);
}
}
private int getVirtualHashCode(String key, int i) {
key += virtualKeyString + i;
return getHashCode(key);
}
public static int getHashCode(String key) {
// int ret = key.hashCode(); // result is too close, deprecated
final int p = 16777619;
int ret = (int) 2166136261L;
for (int i = 0; i < key.length(); i++) {
ret = (ret ^ key.charAt(i)) * p;
}
ret += ret << 13;
ret ^= ret >> 7;
ret += ret << 3;
ret ^= ret >> 17;
ret += ret << 5;
return ret < 0 ? -ret : ret;
}
/**
* get the server from consistent hash ring
* @param cacheKey
* @return
*/
public Cache getServer(String cacheKey) {
if (map.size() <= 0) {
throw new IllegalArgumentException("You must add server before call this method");
}
if (cacheKey == null || cacheKey.isEmpty()) {
throw new IllegalArgumentException("cacheKey can't be empty");
}
int hash = getHashCode(cacheKey);
Map.Entry<Integer, Cache> server = map.ceilingEntry(hash);
return server == null ? map.firstEntry().getValue() : server.getValue();
}
@Override
public String toString() {
int totalNum = 0;
for (Cache cache : servers) {
CacheServerMock mock = (CacheServerMock) cache;
totalNum += mock.getCaches().size();
}
int avg = totalNum / servers.size();
StringBuilder sb = new StringBuilder();
sb.append("总缓存个数:").append(totalNum)
.append(" 服务器台数:").append(servers.size())
.append(" 每服务器虚拟节点数:").append(map.size()/servers.size() - 1)
.append(" 平均值:").append(avg)
.append("\r\n");
calculateStandardDeviation(avg, sb);
return sb.toString();
}
private void calculateStandardDeviation(int avg, StringBuilder sb) {
// 标准差公式: (每个样本 - 平均值)的平方 / (服务器数 - 1),结果再取平方根
long diff = 0;
for (Cache cache : servers) {
CacheServerMock mock = (CacheServerMock) cache;
int size = mock.getCaches().size();
sb.append("Server-Name:")
.append(mock.getServerName())
.append(" Cache-num: ")
.append(size)
.append("\r\n");
long tmp = size - avg;
diff += (tmp * tmp);
}
diff /= (servers.size() - 1);
sb.append("标准差为: ").append(Math.sqrt(diff)).append("\r\n");
}
}



Cache

package hash;
public interface Cache {
// get cache data by key
String get(String key);
// set cache
void set(String key, String val);
}



CacheServerMoke

package hash;
import java.util.HashMap;
import java.util.Map;
/**
* concrete server class
*/
public class CacheServerMock implements Cache {
private Map<String, String> caches = new HashMap<>();
private String serverName;
public CacheServerMock(String serverName) {
this.serverName = serverName;
}
public String getServerName() {
return serverName;
}
public Map<String, String> getCaches() {
return caches;
}
@Override
public String get(String key) {
return caches.get(key);
}
@Override
public void set(String key, String val) {
caches.put(key, val);
}
}



输出结果:

/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/bin/java "-javaagent:/Applications/IntelliJ IDEA.app/Contents/lib/idea_rt.jar=64424:/Applications/IntelliJ IDEA.app/Contents/bin" -Dfile.encoding=UTF-8 -classpath /Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/deploy.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/cldrdata.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/dnsns.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/jaccess.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/jfxrt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/localedata.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/nashorn.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/sunec.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/sunjce_provider.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/sunpkcs11.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/zipfs.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/javaws.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/jfxswt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/management-agent.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/plugin.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/ant-javafx.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/dt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/javafx-mx.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/jconsole.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/packager.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/sa-jdi.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/tools.jar:/Users/zgpeace/Workspace/Java/awesome-java-leetcode/code/LeetCode/out/production/LeetCode hash.CacheImplement
总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:10 平均值:100000
Server-Name:192.168.0.1 Cache-num: 93989
Server-Name:192.168.0.2 Cache-num: 114372
Server-Name:192.168.0.3 Cache-num: 137352
Server-Name:192.168.0.4 Cache-num: 137654
Server-Name:192.168.0.5 Cache-num: 64539
Server-Name:192.168.0.6 Cache-num: 126010
Server-Name:192.168.0.7 Cache-num: 80109
Server-Name:192.168.0.8 Cache-num: 96205
Server-Name:192.168.0.9 Cache-num: 79377
Server-Name:192.168.0.10 Cache-num: 70393
标准差为: 27287.81011733994
总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:50 平均值:100000
Server-Name:192.168.0.1 Cache-num: 112108
Server-Name:192.168.0.2 Cache-num: 116560
Server-Name:192.168.0.3 Cache-num: 85448
Server-Name:192.168.0.4 Cache-num: 105785
Server-Name:192.168.0.5 Cache-num: 87262
Server-Name:192.168.0.6 Cache-num: 90845
Server-Name:192.168.0.7 Cache-num: 89956
Server-Name:192.168.0.8 Cache-num: 133565
Server-Name:192.168.0.9 Cache-num: 89618
Server-Name:192.168.0.10 Cache-num: 88853
标准差为: 16233.25651248079
总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:100 平均值:100000
Server-Name:192.168.0.1 Cache-num: 91865
Server-Name:192.168.0.2 Cache-num: 117698
Server-Name:192.168.0.3 Cache-num: 87517
Server-Name:192.168.0.4 Cache-num: 92990
Server-Name:192.168.0.5 Cache-num: 90695
Server-Name:192.168.0.6 Cache-num: 88082
Server-Name:192.168.0.7 Cache-num: 108513
Server-Name:192.168.0.8 Cache-num: 114522
Server-Name:192.168.0.9 Cache-num: 109090
Server-Name:192.168.0.10 Cache-num: 99028
标准差为: 11449.97379909666
总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:150 平均值:100000
Server-Name:192.168.0.1 Cache-num: 98153
Server-Name:192.168.0.2 Cache-num: 116658
Server-Name:192.168.0.3 Cache-num: 96638
Server-Name:192.168.0.4 Cache-num: 100941
Server-Name:192.168.0.5 Cache-num: 98168
Server-Name:192.168.0.6 Cache-num: 92365
Server-Name:192.168.0.7 Cache-num: 95321
Server-Name:192.168.0.8 Cache-num: 94687
Server-Name:192.168.0.9 Cache-num: 117472
Server-Name:192.168.0.10 Cache-num: 89597
标准差为: 9535.620640524663
总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:200 平均值:100000
Server-Name:192.168.0.1 Cache-num: 99807
Server-Name:192.168.0.2 Cache-num: 102766
Server-Name:192.168.0.3 Cache-num: 101971
Server-Name:192.168.0.4 Cache-num: 103664
Server-Name:192.168.0.5 Cache-num: 95750
Server-Name:192.168.0.6 Cache-num: 94986
Server-Name:192.168.0.7 Cache-num: 100927
Server-Name:192.168.0.8 Cache-num: 90744
Server-Name:192.168.0.9 Cache-num: 120784
Server-Name:192.168.0.10 Cache-num: 88601
标准差为: 8923.409662231135
总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:500 平均值:100000
Server-Name:192.168.0.1 Cache-num: 107026
Server-Name:192.168.0.2 Cache-num: 100697
Server-Name:192.168.0.3 Cache-num: 98181
Server-Name:192.168.0.4 Cache-num: 101271
Server-Name:192.168.0.5 Cache-num: 96521
Server-Name:192.168.0.6 Cache-num: 97214
Server-Name:192.168.0.7 Cache-num: 99982
Server-Name:192.168.0.8 Cache-num: 93274
Server-Name:192.168.0.9 Cache-num: 106760
Server-Name:192.168.0.10 Cache-num: 99074
标准差为: 4300.3152210041535
总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:1000 平均值:100000
Server-Name:192.168.0.1 Cache-num: 99394
Server-Name:192.168.0.2 Cache-num: 98163
Server-Name:192.168.0.3 Cache-num: 96362
Server-Name:192.168.0.4 Cache-num: 98742
Server-Name:192.168.0.5 Cache-num: 102872
Server-Name:192.168.0.6 Cache-num: 103469
Server-Name:192.168.0.7 Cache-num: 97127
Server-Name:192.168.0.8 Cache-num: 97118
Server-Name:192.168.0.9 Cache-num: 106913
Server-Name:192.168.0.10 Cache-num: 99840
标准差为: 3386.525210300375
总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:1500 平均值:100000
Server-Name:192.168.0.1 Cache-num: 101235
Server-Name:192.168.0.2 Cache-num: 96578
Server-Name:192.168.0.3 Cache-num: 101237
Server-Name:192.168.0.4 Cache-num: 94736
Server-Name:192.168.0.5 Cache-num: 105713
Server-Name:192.168.0.6 Cache-num: 99410
Server-Name:192.168.0.7 Cache-num: 100591
Server-Name:192.168.0.8 Cache-num: 100123
Server-Name:192.168.0.9 Cache-num: 101931
Server-Name:192.168.0.10 Cache-num: 98446
标准差为: 3017.90440537801
总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:2000 平均值:100000
Server-Name:192.168.0.1 Cache-num: 100787
Server-Name:192.168.0.2 Cache-num: 96876
Server-Name:192.168.0.3 Cache-num: 102202
Server-Name:192.168.0.4 Cache-num: 96784
Server-Name:192.168.0.5 Cache-num: 108713
Server-Name:192.168.0.6 Cache-num: 98230
Server-Name:192.168.0.7 Cache-num: 99731
Server-Name:192.168.0.8 Cache-num: 98804
Server-Name:192.168.0.9 Cache-num: 99857
Server-Name:192.168.0.10 Cache-num: 98016
标准差为: 3497.283946150212
Process finished with exit code 0



参考

https://xie.infoq.cn/article/8651e7b5ef05177005b474dfc



发布于: 2020 年 07 月 08 日 阅读数: 43
用户头像

John(易筋)

关注

问渠那得清如许?为有源头活水来 2018.07.17 加入

工作10+年,架构师,曾经阿里巴巴资深无线开发,汇丰银行架构师/专家。开发过日活过亿的淘宝Taobao App,擅长架构、算法、数据结构、设计模式、iOS、Java Spring Boot。易筋为阿里巴巴花名。

评论

发布
暂无评论
极客大学架构师训练营 系统架构 一致性哈希 Consistent Hash 第五次作业