架构 0 期 -week5- 命题作业
发布于: 2020 年 07 月 10 日
用你熟悉的编程语言实现一致性 hash 算法。
编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。
public class ConsistentHash { private final List<String> realNodes; public ConsistentHash(List<String> realNodes) { this.realNodes = realNodes; } public static Set<String> buildCacheKeys(int n) { Set<String> keySet = new HashSet<>(); while (keySet.size() < n) { String key = UUID.randomUUID().toString(); if (keySet.contains(key)) { continue; } keySet.add(key); } return keySet; } public static void main(String[] args) { ImmutableList<String> realNodes = ImmutableList.of( "10.9.4.10", "10.9.4.11", "10.9.4.12", "10.9.4.13", "10.9.4.14", "10.9.4.15", "10.9.4.16", "10.9.4.17", "10.9.4.18", "10.9.4.19"); // 1.生成 keyCount 个 key int keyCount = 1_000_000; Set<String> cacheKeys = ConsistentHash.buildCacheKeys(keyCount); ConsistentHash consistentHash = new ConsistentHash(realNodes); double min = Double.MAX_VALUE; int suitableVirtualNodesPerRealNode = 0; int lowest = 220; int highest = 270; for (int i = lowest; i <= highest; i++) { // 2.按每个节点扩展到 i 个虚拟节点,构建虚拟节点(的 hash 值)对应真实节点的映射 SortedMap<Integer, String> virtualHashToNodeMap = consistentHash.buildVirtualHashToNode(i); // 3.把 keyCount 个 key 分布到真实节点上去,得到真是节点分配的 key 数量 Map<String, Integer> realNodeToKeyCountMap = consistentHash.distributeKeys(cacheKeys, virtualHashToNodeMap); Set<Double> keyCountPerRealNodeSet = realNodeToKeyCountMap.values().stream().map(Double::new).collect(Collectors.toSet()); // 4.计算真实节点分配到的 key 数量的样本标准差 double std = CalSta.sampleStdDev(keyCountPerRealNodeSet); // 5.寻找最小标准差及虚拟节点数 if (min > std) { min = std; suitableVirtualNodesPerRealNode = i; } if (i % 5 == 0) { System.out.println(String.format("i: %s, std: %s", i, std)); } } System.out.println(String.format("suitableVirtualNodesPerRealNode: %s, StdDeviation: %s", suitableVirtualNodesPerRealNode, min)); } public SortedMap<Integer, String> buildVirtualHashToNode(int virtualNodePerRealNode) { SortedMap<Integer, String> virtualHashToNodeMap = new TreeMap<>(); for (String node : realNodes) { for (int i = 0; i < virtualNodePerRealNode; i++) { String vNodeName = node + "&&VN" + i; int hash = getHash(vNodeName); virtualHashToNodeMap.put(hash, node); } } return virtualHashToNodeMap; } public Map<String, Integer> distributeKeys(Set<String> keys, SortedMap<Integer, String> virtualHashToNodeMap) { Map<String, Integer> realNodeToKeyCountMap = Maps.newHashMap(); for (String s : keys) { int hash = getHash(s); SortedMap<Integer, String> subMap = virtualHashToNodeMap.tailMap(hash); String node = subMap.isEmpty() ? virtualHashToNodeMap.get(virtualHashToNodeMap.firstKey()) : subMap.get(subMap.firstKey()); Integer keyCount = realNodeToKeyCountMap.getOrDefault(node, 0); realNodeToKeyCountMap.put(node, keyCount + 1); } return realNodeToKeyCountMap; } public int getHash(String str) { final int p = 16777619; int hash = (int) 2166136261L; for (int i = 0; i < str.length(); i++) { hash = (hash ^ str.charAt(i)) * p; } hash += hash << 13; hash ^= hash >> 7; hash += hash << 3; hash ^= hash >> 17; hash += hash << 5; if (hash < 0) { hash = Math.abs(hash); } return hash; }}
/** * https://blog.csdn.net/l1028386804/article/details/54573106 * @author chenjun */public class CalSta { public static void main(String[] args) { Set<Double> testData = Sets.newHashSet(2d, 4d, 6d, 7d, 8d, 9d, 12d, 36d); System.out.println("总和Sum " + CalSta.sum(testData)); System.out.println("平均值Mean " + CalSta.mean(testData)); System.out.println("总体方差Population Variance " + CalSta.popVariance(testData)); System.out.println("总体标准差Population STD_dev " + CalSta.popStdDev(testData)); System.out.println("样本方差Sample Variance " + CalSta.sampleVariance(testData)); System.out.println("样本标准差Sample STD_dev " + CalSta.sampleStdDev(testData)); } public static double sum(Set<Double> data) { double sum = 0; for (double datum : data) { sum = sum + datum; } return sum; } public static double mean(Set<Double> data) { double mean; mean = sum(data) / data.size(); return mean; } /** * population variance 总体方差 * * @param data 数据 * @return 总体方差 */ public static double popVariance(Set<Double> data) { double variance = 0; for (double datum : data) { variance = variance + (Math.pow((datum - mean(data)), 2)); } variance = variance / data.size(); return variance; } /** * population standard deviation 总体标准差 * * @param data 数据 * @return 样本标准差 */ public static double popStdDev(Set<Double> data) { double stdDev; stdDev = Math.sqrt(popVariance(data)); return stdDev; } /** * sample variance 样本方差 * * @param data 数据 * @return 样本标准差 */ public static double sampleVariance(Set<Double> data) { double variance = 0; for (double datum : data) { variance = variance + (Math.pow((datum - mean(data)), 2)); } variance = variance / (data.size() - 1); return variance; } /** * sample standard deviation 样本标准差 * * @param data 数据 * @return 样本标准差 */ public static double sampleStdDev(Set<Double> data) { double stdDev; stdDev = Math.sqrt(sampleVariance(data)); return stdDev; }}
划线
评论
复制
发布于: 2020 年 07 月 10 日阅读数: 76
陈俊
关注
还未添加个人签名 2017.09.10 加入
还未添加个人简介
评论