1
Week5 命题作业
发布于: 2020 年 07 月 03 日
用你熟悉的编程语言实现一致性 hash 算法。
编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。
解答:
代码实现
HashUtils.java
package hash;public class HashUtils { /** * 计算Hash值, 使用FNV1_32_HASH算法 * * @param str * @return hash值 */ public static int getHash(String str) { final int p = 16777619; int hash = (int) 2166136261L; for (int i = 0; i < str.length(); i++) { hash = (hash ^ str.charAt(i)) * p; } hash += hash << 13; hash ^= hash >> 7; hash += hash << 3; hash ^= hash >> 17; hash += hash << 5; if (hash < 0) { hash = Math.abs(hash); } return hash; }}
ConsistentHash.java
package hash;import java.util.HashMap;import java.util.Map;import java.util.SortedMap;import java.util.TreeMap;import java.util.UUID;public class ConsistentHash { public static String getServer(String key, SortedMap<Integer, String> virtualNodes) { int hash = HashUtils.getHash(key); // 取出所有大于hash值的部分 SortedMap<Integer, String> subMap = virtualNodes.tailMap(hash); if (subMap.isEmpty()) { // hash值在最尾部,应该映射到第一个group上 String virtualNode = virtualNodes.get(virtualNodes.firstKey()); return virtualNode.substring(0, virtualNode.indexOf("&&")); } String virtualNode = subMap.get(subMap.firstKey()); return virtualNode.substring(0, virtualNode.indexOf("&&")); } public static void main(String[] args) { /** 服务器节点列表 */ String[] addresses = { "Node1", "Node2", "Node3", "Node4", "Node5", "Node6", "Node7", "Node8", "Node9", "Node10" }; /** 虚拟节点数 */ int VIRTUAL_NODES[] = { 1, 100, 1000, 10000, 100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000, 900000, 1000000 }; for (int i = 0; i < VIRTUAL_NODES.length; i++) { SortedMap<Integer, String> virtualNodes = new TreeMap<>(); for (String realNode : addresses) { for (int j = 0; j < VIRTUAL_NODES[i]; j++) { String virtualNodeName = realNode + "&&VN" + String.valueOf(j); int hash = HashUtils.getHash(virtualNodeName); // System.out.println("虚拟节点[" + virtualNodeName + "]被添加, hash值为" + hash); virtualNodes.put(hash, virtualNodeName); } } // 生成1000000个数据进行测试 Map<String, Integer> resMap = new HashMap<>(); for (int k = 0; k < 1000000; k++) { String server = getServer(UUID.randomUUID().toString(), virtualNodes); if (resMap.containsKey(server)) { resMap.put(server, resMap.get(server) + 1); } else { resMap.put(server, 1); } } long total = 0; for (Map.Entry<String, Integer> entry : resMap.entrySet()) { String key = entry.getKey(); int value = entry.getValue(); total += (value - 100000) * (value - 100000); System.out.println("server: " + key + " count: " + value + "(" + value / 10000.0D + "%)"); } System.out.printf("%d个虚拟节点的标准差:%.2f\n", VIRTUAL_NODES[i], Math.sqrt(total * 1.0D / 9.0D)); } }}
标准差与虚拟节点数的关系
不同虚拟节点数的运行结果
server: Node1 count: 57008(5.7008%)server: Node10 count: 32869(3.2869%) server: Node9 count: 25098(2.5098%) server: Node8 count: 246871(24.6871%)server: Node7 count: 57793(5.7793%) server: Node6 count: 55001(5.5001%) server: Node5 count: 171823(17.1823%)server: Node4 count: 46792(4.6792%) server: Node3 count: 231128(23.1128%)server: Node2 count: 75617(7.5617%) 1个虚拟节点的标准差:28454.19server: Node1 count: 92017(9.2017%)server: Node10 count: 95557(9.5557%) server: Node9 count: 92931(9.2931%) server: Node8 count: 93322(9.3322%) server: Node7 count: 98528(9.8528%) server: Node6 count: 108408(10.8408%)server: Node5 count: 105534(10.5534%)server: Node4 count: 105056(10.5056%)server: Node3 count: 108663(10.8663%)server: Node2 count: 99984(9.9984%) 100个虚拟节点的标准差:6516.07 server: Node1 count: 102310(10.231%)server: Node10 count: 97672(9.7672%) server: Node9 count: 99932(9.9932%) server: Node8 count: 95475(9.5475%) server: Node7 count: 99696(9.9696%) server: Node6 count: 97245(9.7245%) server: Node5 count: 102953(10.2953%)server: Node4 count: 98557(9.8557%) server: Node3 count: 107236(10.7236%)server: Node2 count: 98924(9.8924%) 1000个虚拟节点的标准差:3386.88 server: Node1 count: 99290(9.929%)server: Node10 count: 101813(10.1813%)server: Node9 count: 100108(10.0108%) server: Node8 count: 100661(10.0661%)server: Node7 count: 98908(9.8908%)server: Node6 count: 98649(9.8649%)server: Node5 count: 99874(9.9874%)server: Node4 count: 100263(10.0263%)server: Node3 count: 99912(9.9912%)server: Node2 count: 100522(10.0522%)10000个虚拟节点的标准差:920.30server: Node1 count: 99028(9.9028%)server: Node10 count: 99956(9.9956%)server: Node9 count: 99817(9.9817%)server: Node8 count: 100646(10.0646%)server: Node7 count: 100384(10.0384%)server: Node6 count: 99837(9.9837%)server: Node5 count: 100480(10.048%)server: Node4 count: 99532(9.9532%)server: Node3 count: 100143(10.0143%)server: Node2 count: 100177(10.0177%)100000个虚拟节点的标准差:479.90server: Node1 count: 99843(9.9843%)server: Node10 count: 100624(10.0624%)server: Node9 count: 99136(9.9136%)server: Node8 count: 100312(10.0312%)server: Node7 count: 100233(10.0233%)server: Node6 count: 100418(10.0418%)server: Node5 count: 100238(10.0238%)server: Node4 count: 100103(10.0103%)server: Node3 count: 99509(9.9509%)server: Node2 count: 99584(9.9584%)200000个虚拟节点的标准差:467.65server: Node1 count: 99853(9.9853%)server: Node10 count: 100389(10.0389%)server: Node9 count: 100222(10.0222%)server: Node8 count: 99819(9.9819%)server: Node7 count: 100568(10.0568%)server: Node6 count: 100355(10.0355%)server: Node5 count: 100088(10.0088%)server: Node4 count: 100217(10.0217%)server: Node3 count: 99261(9.9261%)server: Node2 count: 99228(9.9228%)300000个虚拟节点的标准差:459.54server: Node1 count: 100192(10.0192%)server: Node10 count: 100197(10.0197%)server: Node9 count: 100003(10.0003%)server: Node8 count: 100481(10.0481%)server: Node7 count: 100126(10.0126%)server: Node6 count: 100285(10.0285%)server: Node5 count: 99829(9.9829%)server: Node4 count: 99559(9.9559%)server: Node3 count: 100113(10.0113%)server: Node2 count: 99215(9.9215%)400000个虚拟节点的标准差:373.70server: Node1 count: 99826(9.9826%)server: Node10 count: 100623(10.0623%)server: Node9 count: 99417(9.9417%)server: Node8 count: 100416(10.0416%)server: Node7 count: 100334(10.0334%)server: Node6 count: 99879(9.9879%)server: Node5 count: 99874(9.9874%)server: Node4 count: 100158(10.0158%)server: Node3 count: 100039(10.0039%)server: Node2 count: 99434(9.9434%)500000个虚拟节点的标准差:397.25server: Node1 count: 100128(10.0128%)server: Node10 count: 100244(10.0244%)server: Node9 count: 99990(9.999%) server: Node8 count: 99997(9.9997%) server: Node7 count: 100225(10.0225%) server: Node6 count: 100084(10.0084%) server: Node5 count: 100151(10.0151%) server: Node4 count: 99870(9.987%) server: Node3 count: 99898(9.9898%) server: Node2 count: 99413(9.9413%) 600000个虚拟节点的标准差:242.30 server: Node1 count: 99897(9.9897%)server: Node10 count: 100117(10.0117%)server: Node9 count: 99831(9.9831%) server: Node8 count: 100161(10.0161%)server: Node7 count: 100194(10.0194%)server: Node6 count: 100373(10.0373%)server: Node5 count: 100091(10.0091%)server: Node4 count: 99613(9.9613%)server: Node3 count: 99776(9.9776%)server: Node2 count: 99947(9.9947%)700000个虚拟节点的标准差:227.69server: Node1 count: 100202(10.0202%)server: Node10 count: 100206(10.0206%)server: Node9 count: 99933(9.9933%)server: Node8 count: 99982(9.9982%)server: Node7 count: 100019(10.0019%)server: Node6 count: 99823(9.9823%)server: Node5 count: 99507(9.9507%)server: Node4 count: 100471(10.0471%)server: Node3 count: 100220(10.022%)server: Node2 count: 99637(9.9637%)800000个虚拟节点的标准差:291.51server: Node1 count: 99456(9.9456%)server: Node10 count: 100239(10.0239%)server: Node9 count: 100458(10.0458%)server: Node8 count: 100365(10.0365%)server: Node7 count: 100316(10.0316%)server: Node6 count: 99410(9.941%)server: Node5 count: 99689(9.9689%)server: Node4 count: 99854(9.9854%)server: Node3 count: 100002(10.0002%)server: Node2 count: 100211(10.0211%)900000个虚拟节点的标准差:381.02server: Node1 count: 99895(9.9895%)server: Node10 count: 100196(10.0196%)server: Node9 count: 100270(10.027%)server: Node8 count: 99871(9.9871%)server: Node7 count: 100819(10.0819%)server: Node6 count: 99792(9.9792%)server: Node5 count: 99827(9.9827%)server: Node4 count: 99872(9.9872%)server: Node3 count: 99632(9.9632%)server: Node2 count: 99826(9.9826%)1000000个虚拟节点的标准差:344.00
ECharts可视化
横轴:虚拟节点数;
纵轴:标准差
小结
由绘图可以观察到:
在一定范围内,随着虚拟节点数的增大,标准差急剧降低;超过某个点后,再增加虚拟节点数,标准差将在某个值附近小幅度波动,并不会继续下降,说明有极限值。
综上,必须根据场景,通过实践找到合适的、合理的虚拟节点数,不能过少,也不能一味增加,需要综合考虑效率与成本。
划线
评论
复制
发布于: 2020 年 07 月 03 日阅读数: 74
版权声明: 本文为 InfoQ 作者【星河寒水】的原创文章。
原文链接:【http://xie.infoq.cn/article/d625b9853c8bab7d24941580c】。未经作者许可,禁止转载。
星河寒水
关注
还未添加个人签名 2018.09.17 加入
还未添加个人简介
评论