极客大学架构师训练营 系统架构 一致性哈希 Consistent Hash 第五次作业
发布于: 2020 年 07 月 08 日
用你熟悉的编程语言实现一致性 hash 算法。
编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。
答:
1、首先选用Java的TreeMap,因为它基于红黑树实现,查找效率比较高,而且已经原生提供了ceilingEntry和firstEntry方法;
其次,虚拟结点的Hash计算必须是稳定不变的,否则服务重启可能导致虚拟结点跳变;
还有,过程中用Mock类,代替物理服务器完成缓存的实际读写操作;
如果直接采用String.hashCode,发现计算得到的hash值比较集中,不适合使用,于是去网上另外找了一个hash算法替代。
2、项目的2个关键流程简画如下:
3、类设计图如下:
CacheImplement
package hash;public class CacheImplement implements Cache { private ConsistentHashRing ring = new ConsistentHashRing(); private String[] servers; public CacheImplement(String[] servers, int virtualFactorPerNode) { ring.addServers(servers, virtualFactorPerNode); } @Override public String get(String key) { Cache serverKey = ring.getServer(key); return serverKey.get(key); } @Override public void set(String key, String val) { Cache serverKey = ring.getServer(key); serverKey.set(key,val); } @Override public String toString() { return ring.toString(); } static String[] ipServers = new String[] { "192.168.0.1", "192.168.0.2", "192.168.0.3", "192.168.0.4", "192.168.0.5", "192.168.0.6", "192.168.0.7", "192.168.0.8", "192.168.0.9", "192.168.0.10" }; static int maxCacheNumber = 1_000_000; public static void main(String[] args) { testCache(10); testCache(50); testCache(100); testCache(150); testCache(200); testCache(500); testCache(1_000); testCache(1_500); testCache(2_000); } static void testCache(int virtualFactorPerNode) { Cache cache = new CacheImplement(ipServers, virtualFactorPerNode); fillCache(cache, maxCacheNumber); System.out.println(cache); } static void fillCache(Cache cache, int cacheNumber) { for (int i = 0; i < cacheNumber; i++) { String key = "key" + i; String val = "val" + key; cache.set(key, val); } }}
哈希环类 ConsistentHashRing
package hash;import java.util.*;public class ConsistentHashRing { TreeMap<Integer, Cache> map = new TreeMap<Integer, Cache>(); List<Cache> servers = new ArrayList<>(); final String virtualKeyString = "&&Virtual"; public void addServers(String[] servers, int virtualFactorPerNode) { if (servers == null) { return; } for (String server: servers) { Cache cacheServer = new CacheServerMock(server); map.put(getHashCode(server), cacheServer); // add virtual nodes for (int i = 1; i <= virtualFactorPerNode; i++) { map.put(getVirtualHashCode(server, i), cacheServer); } this.servers.add(cacheServer); } } private int getVirtualHashCode(String key, int i) { key += virtualKeyString + i; return getHashCode(key); } public static int getHashCode(String key) { // int ret = key.hashCode(); // result is too close, deprecated final int p = 16777619; int ret = (int) 2166136261L; for (int i = 0; i < key.length(); i++) { ret = (ret ^ key.charAt(i)) * p; } ret += ret << 13; ret ^= ret >> 7; ret += ret << 3; ret ^= ret >> 17; ret += ret << 5; return ret < 0 ? -ret : ret; } /** * get the server from consistent hash ring * @param cacheKey * @return */ public Cache getServer(String cacheKey) { if (map.size() <= 0) { throw new IllegalArgumentException("You must add server before call this method"); } if (cacheKey == null || cacheKey.isEmpty()) { throw new IllegalArgumentException("cacheKey can't be empty"); } int hash = getHashCode(cacheKey); Map.Entry<Integer, Cache> server = map.ceilingEntry(hash); return server == null ? map.firstEntry().getValue() : server.getValue(); } @Override public String toString() { int totalNum = 0; for (Cache cache : servers) { CacheServerMock mock = (CacheServerMock) cache; totalNum += mock.getCaches().size(); } int avg = totalNum / servers.size(); StringBuilder sb = new StringBuilder(); sb.append("总缓存个数:").append(totalNum) .append(" 服务器台数:").append(servers.size()) .append(" 每服务器虚拟节点数:").append(map.size()/servers.size() - 1) .append(" 平均值:").append(avg) .append("\r\n"); calculateStandardDeviation(avg, sb); return sb.toString(); } private void calculateStandardDeviation(int avg, StringBuilder sb) { // 标准差公式: (每个样本 - 平均值)的平方 / (服务器数 - 1),结果再取平方根 long diff = 0; for (Cache cache : servers) { CacheServerMock mock = (CacheServerMock) cache; int size = mock.getCaches().size(); sb.append("Server-Name:") .append(mock.getServerName()) .append(" Cache-num: ") .append(size) .append("\r\n"); long tmp = size - avg; diff += (tmp * tmp); } diff /= (servers.size() - 1); sb.append("标准差为: ").append(Math.sqrt(diff)).append("\r\n"); }}
Cache
package hash;public interface Cache { // get cache data by key String get(String key); // set cache void set(String key, String val);}
CacheServerMoke
package hash;import java.util.HashMap;import java.util.Map;/** * concrete server class */public class CacheServerMock implements Cache { private Map<String, String> caches = new HashMap<>(); private String serverName; public CacheServerMock(String serverName) { this.serverName = serverName; } public String getServerName() { return serverName; } public Map<String, String> getCaches() { return caches; } @Override public String get(String key) { return caches.get(key); } @Override public void set(String key, String val) { caches.put(key, val); }}
输出结果:
/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/bin/java "-javaagent:/Applications/IntelliJ IDEA.app/Contents/lib/idea_rt.jar=64424:/Applications/IntelliJ IDEA.app/Contents/bin" -Dfile.encoding=UTF-8 -classpath /Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/deploy.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/cldrdata.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/dnsns.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/jaccess.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/jfxrt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/localedata.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/nashorn.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/sunec.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/sunjce_provider.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/sunpkcs11.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/ext/zipfs.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/javaws.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/jfxswt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/management-agent.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/plugin.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/ant-javafx.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/dt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/javafx-mx.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/jconsole.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/packager.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/sa-jdi.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_172.jdk/Contents/Home/lib/tools.jar:/Users/zgpeace/Workspace/Java/awesome-java-leetcode/code/LeetCode/out/production/LeetCode hash.CacheImplement总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:10 平均值:100000Server-Name:192.168.0.1 Cache-num: 93989Server-Name:192.168.0.2 Cache-num: 114372Server-Name:192.168.0.3 Cache-num: 137352Server-Name:192.168.0.4 Cache-num: 137654Server-Name:192.168.0.5 Cache-num: 64539Server-Name:192.168.0.6 Cache-num: 126010Server-Name:192.168.0.7 Cache-num: 80109Server-Name:192.168.0.8 Cache-num: 96205Server-Name:192.168.0.9 Cache-num: 79377Server-Name:192.168.0.10 Cache-num: 70393标准差为: 27287.81011733994总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:50 平均值:100000Server-Name:192.168.0.1 Cache-num: 112108Server-Name:192.168.0.2 Cache-num: 116560Server-Name:192.168.0.3 Cache-num: 85448Server-Name:192.168.0.4 Cache-num: 105785Server-Name:192.168.0.5 Cache-num: 87262Server-Name:192.168.0.6 Cache-num: 90845Server-Name:192.168.0.7 Cache-num: 89956Server-Name:192.168.0.8 Cache-num: 133565Server-Name:192.168.0.9 Cache-num: 89618Server-Name:192.168.0.10 Cache-num: 88853标准差为: 16233.25651248079总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:100 平均值:100000Server-Name:192.168.0.1 Cache-num: 91865Server-Name:192.168.0.2 Cache-num: 117698Server-Name:192.168.0.3 Cache-num: 87517Server-Name:192.168.0.4 Cache-num: 92990Server-Name:192.168.0.5 Cache-num: 90695Server-Name:192.168.0.6 Cache-num: 88082Server-Name:192.168.0.7 Cache-num: 108513Server-Name:192.168.0.8 Cache-num: 114522Server-Name:192.168.0.9 Cache-num: 109090Server-Name:192.168.0.10 Cache-num: 99028标准差为: 11449.97379909666总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:150 平均值:100000Server-Name:192.168.0.1 Cache-num: 98153Server-Name:192.168.0.2 Cache-num: 116658Server-Name:192.168.0.3 Cache-num: 96638Server-Name:192.168.0.4 Cache-num: 100941Server-Name:192.168.0.5 Cache-num: 98168Server-Name:192.168.0.6 Cache-num: 92365Server-Name:192.168.0.7 Cache-num: 95321Server-Name:192.168.0.8 Cache-num: 94687Server-Name:192.168.0.9 Cache-num: 117472Server-Name:192.168.0.10 Cache-num: 89597标准差为: 9535.620640524663总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:200 平均值:100000Server-Name:192.168.0.1 Cache-num: 99807Server-Name:192.168.0.2 Cache-num: 102766Server-Name:192.168.0.3 Cache-num: 101971Server-Name:192.168.0.4 Cache-num: 103664Server-Name:192.168.0.5 Cache-num: 95750Server-Name:192.168.0.6 Cache-num: 94986Server-Name:192.168.0.7 Cache-num: 100927Server-Name:192.168.0.8 Cache-num: 90744Server-Name:192.168.0.9 Cache-num: 120784Server-Name:192.168.0.10 Cache-num: 88601标准差为: 8923.409662231135总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:500 平均值:100000Server-Name:192.168.0.1 Cache-num: 107026Server-Name:192.168.0.2 Cache-num: 100697Server-Name:192.168.0.3 Cache-num: 98181Server-Name:192.168.0.4 Cache-num: 101271Server-Name:192.168.0.5 Cache-num: 96521Server-Name:192.168.0.6 Cache-num: 97214Server-Name:192.168.0.7 Cache-num: 99982Server-Name:192.168.0.8 Cache-num: 93274Server-Name:192.168.0.9 Cache-num: 106760Server-Name:192.168.0.10 Cache-num: 99074标准差为: 4300.3152210041535总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:1000 平均值:100000Server-Name:192.168.0.1 Cache-num: 99394Server-Name:192.168.0.2 Cache-num: 98163Server-Name:192.168.0.3 Cache-num: 96362Server-Name:192.168.0.4 Cache-num: 98742Server-Name:192.168.0.5 Cache-num: 102872Server-Name:192.168.0.6 Cache-num: 103469Server-Name:192.168.0.7 Cache-num: 97127Server-Name:192.168.0.8 Cache-num: 97118Server-Name:192.168.0.9 Cache-num: 106913Server-Name:192.168.0.10 Cache-num: 99840标准差为: 3386.525210300375总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:1500 平均值:100000Server-Name:192.168.0.1 Cache-num: 101235Server-Name:192.168.0.2 Cache-num: 96578Server-Name:192.168.0.3 Cache-num: 101237Server-Name:192.168.0.4 Cache-num: 94736Server-Name:192.168.0.5 Cache-num: 105713Server-Name:192.168.0.6 Cache-num: 99410Server-Name:192.168.0.7 Cache-num: 100591Server-Name:192.168.0.8 Cache-num: 100123Server-Name:192.168.0.9 Cache-num: 101931Server-Name:192.168.0.10 Cache-num: 98446标准差为: 3017.90440537801总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:2000 平均值:100000Server-Name:192.168.0.1 Cache-num: 100787Server-Name:192.168.0.2 Cache-num: 96876Server-Name:192.168.0.3 Cache-num: 102202Server-Name:192.168.0.4 Cache-num: 96784Server-Name:192.168.0.5 Cache-num: 108713Server-Name:192.168.0.6 Cache-num: 98230Server-Name:192.168.0.7 Cache-num: 99731Server-Name:192.168.0.8 Cache-num: 98804Server-Name:192.168.0.9 Cache-num: 99857Server-Name:192.168.0.10 Cache-num: 98016标准差为: 3497.283946150212Process finished with exit code 0
参考
https://xie.infoq.cn/article/8651e7b5ef05177005b474dfc
划线
评论
复制
发布于: 2020 年 07 月 08 日 阅读数: 43
版权声明: 本文为 InfoQ 作者【John(易筋)】的原创文章。
原文链接:【http://xie.infoq.cn/article/fb3c2581ebfeb29d81fa5d83d】。
本文遵守【CC-BY 4.0】协议,转载请保留原文出处及本版权声明。
John(易筋)
关注
问渠那得清如许?为有源头活水来 2018.07.17 加入
工作10+年,架构师,曾经阿里巴巴资深无线开发,汇丰银行架构师/专家。开发过日活过亿的淘宝Taobao App,擅长架构、算法、数据结构、设计模式、iOS、Java Spring Boot。易筋为阿里巴巴花名。
评论