架构师训练营第 5 周作业
发布于: 2020 年 07 月 08 日
作业
用你熟悉的编程语言实现一致性 hash 算法。
编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。
内容
源码
// 哈希算法接口public interface HashAlgorithm { long hash(String string);}// CRC32哈希算法public class Crc32HashAlgorithm implements HashAlgorithm { ThreadLocal<CRC32> threadLocal = ThreadLocal.withInitial(() -> new CRC32()); @Override public long hash(String string) { CRC32 crc32 = threadLocal.get(); crc32.reset(); crc32.update(string.getBytes()); crc32.getValue(); return crc32.getValue(); }}// MD5哈希算法public class MD5HashAlgorithm implements HashAlgorithm { private MessageDigest instance; public MD5HashAlgorithm() { try { instance = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { } } @Override public long hash(String string) { instance.reset(); instance.update(string.getBytes()); byte[] digest = instance.digest(); long h = 0; for (int i = 0; i < 4; i++) { h <<= 8; h |= ((int) digest[i]) & 0xFF; } return h; }}// Murmur哈希算法public class MurmurHashAlgorithm implements HashAlgorithm { @Override public long hash(String string) { int seed = 0x9747b28c; byte[] data = string.getBytes(); int length = data.length; // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. final int m = 0x5bd1e995; final int r = 24; // Initialize the hash to a random value int h = seed^length; int length4 = length/4; for (int i=0; i<length4; i++) { final int i4 = i*4; int k = (data[i4+0]&0xff) +((data[i4+1]&0xff)<<8) +((data[i4+2]&0xff)<<16) +((data[i4+3]&0xff)<<24); k *= m; k ^= k >>> r; k *= m; h *= m; h ^= k; } // Handle the last few bytes of the input array switch (length%4) { case 3: h ^= (data[(length&~3) +2]&0xff) << 16; case 2: h ^= (data[(length&~3) +1]&0xff) << 8; case 1: h ^= (data[length&~3]&0xff); h *= m; } h ^= h >>> 13; h *= m; h ^= h >>> 15; return h; }}// 节点定义public class Node { @Getter private String name; private Map<String, String> cache; public Node(String name) { this.name = name; cache = new ConcurrentHashMap<>(); } public void put(String key, String value) { cache.put(key, value); } public int size() { return cache.size(); }}// 一致性哈希实现public class ConsistentHash { private List<Node> nodes; private int vNum; private HashAlgorithm algorithm; private SortedMap<Long, Node> virtualNodeMap; public ConsistentHash(List<Node> nodes, HashAlgorithm algorithm, int vNum) { this.nodes = nodes; this.algorithm = algorithm; this.vNum = vNum; virtualNodeMap = new TreeMap<>(); nodes.forEach(n -> { for (int i = 0; i < vNum; i++) { final String vNodeName = String.format("%s#%d", n.getName(), i); virtualNodeMap.put(algorithm.hash(vNodeName), n); } }); } public void put(String key, String value) { Node node = selectNode(key); node.put(key, value); } public void printSummary() { System.out.println("Summary:"); nodes.forEach(n -> { System.out.println("\t\tNode=" + n.getName() + ", Size=" + n.size()); }); } public double standardDeviation() { int total = nodes.stream().mapToInt(Node::size).sum(); if (total == 0) { return 0; } double avg = total * 1.0 / nodes.size(); final double sum = nodes.stream().mapToDouble(n -> Math.pow(n.size() * 1.0 - avg, 2.0)).sum(); return Math.sqrt(sum / nodes.size()); } private Node selectNode(String key) { long hash = algorithm.hash(key); SortedMap<Long, Node> subMap = virtualNodeMap.tailMap(hash); if (subMap.isEmpty()) { Long i = virtualNodeMap.firstKey(); return virtualNodeMap.get(i); } Long i = subMap.firstKey(); return subMap.get(i); }}// 单元测试public class ConsistentHashTest { public static final List<String> DEFAULT_NODE_NAMES = Arrays.asList("10.0.0.1", "10.0.0.2", "10.0.0.3", "10.0.0.4", "10.0.0.5", "10.0.0.6", "10.0.0.7", "10.0.0.8"); @Test public void testWithMurmurHash() { final HashAlgorithm algorithm = new MurmurHashAlgorithm(); runTestCases(algorithm); } @Test public void testWithMD5Hash() { final HashAlgorithm algorithm = new MD5HashAlgorithm(); runTestCases(algorithm); } @Test public void testWithCrc32Hash() { final HashAlgorithm algorithm = new Crc32HashAlgorithm(); runTestCases(algorithm); } @Test public void testWithHashCode() { final HashAlgorithm algorithm = s -> Math.abs(s.hashCode()); runTestCases(algorithm); } private void runTestCases(HashAlgorithm algorithm) { System.out.println("\n run test cases with algorithm = " + algorithm.getClass().getName()); testWith(DEFAULT_NODE_NAMES, algorithm, 1_000, uuidInitial); testWith(DEFAULT_NODE_NAMES, algorithm, 10_000, uuidInitial); testWith(DEFAULT_NODE_NAMES, algorithm, 100_000, uuidInitial); testWith(DEFAULT_NODE_NAMES, algorithm, 1_000, intInitial); testWith(DEFAULT_NODE_NAMES, algorithm, 10_000, intInitial); testWith(DEFAULT_NODE_NAMES, algorithm, 100_000, intInitial); } public void testWith(List<String> nodeNames, HashAlgorithm algorithm, int vNum, Consumer<ConsistentHash> initData) { final List<Node> nodes = nodeNames.stream().map(Node::new).collect(Collectors.toList()); ConsistentHash consistentHash = new ConsistentHash(nodes, algorithm, vNum); initData.accept(consistentHash);// consistentHash.printSummary(); System.out.println(format(consistentHash.standardDeviation(), nodeNames.size(), algorithm.getClass().getSimpleName(), vNum, initData)); } private Consumer<ConsistentHash> uuidInitial = consistentHash -> { for (int i = 0; i < 1_000_000; i++) { consistentHash.put(UUID.randomUUID().toString(), String.format("%d", i)); } }; private Consumer<ConsistentHash> intInitial = consistentHash -> { for (int i = 0; i < 1_000_000; i++) { consistentHash.put(String.format("%d", i), String.format("%d", i)); } }; private String format(double sd, int nodeSize, String hashName, int vNum, Consumer<ConsistentHash> dataInitial) { String dataType = "unknown"; if (dataInitial == uuidInitial) { dataType = "uuid"; } if (dataInitial == intInitial) { dataType = "int"; } return String.format("标准差: %,f, 节点数: %d, 哈希算法: %s, 虚拟节点数: %s, 数据集: %s", sd, nodeSize, hashName, vNum, dataType); }}
运行结果
划线
评论
复制
发布于: 2020 年 07 月 08 日 阅读数: 29
Season
关注
还未添加个人签名 2019.09.28 加入
还未添加个人简介
评论