写点什么

第五周作业

用户头像
关注
发布于: 2020 年 10 月 25 日
  1. 用你熟悉的编程语言实现一致性 hash 算法。

  2. 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。


一致性哈希算法实现:

public class ConsistantHashing {
// 虚拟节点数 private int virtualNodeNum;
private Comparator<Node> comparator = Comparator.comparingInt(o -> o.virtualNodeIndex);
private TreeSet<Node> virtualNodeRing = new TreeSet<>(comparator);
private List<String> realNodes = new ArrayList<>();
public ConsistantHashing(int virtualNodeNum) {
this.virtualNodeNum = virtualNodeNum; }
public List<String> getRealNodes() { return realNodes; }
public String route(String key) {
int index = getHash(key); Node node = findNode(index); // System.out.println("key:" + key + ", index:" + index + ", node:" + node.nodeValue);
return node.nodeValue; }
private Node findNode(int index) {
Node higher = virtualNodeRing.higher(new Node(index, index, null)); if (higher == null) { return virtualNodeRing.first(); } return higher; }
public void addNode(String nodeValue) {
List<Node> nodeList = virtualization(nodeValue); virtualNodeRing.addAll(nodeList); realNodes.add(nodeValue); }
private List<Node> virtualization(String nodeValue) {
int realNodeIndex = getHash(nodeValue); List<Node> list = new ArrayList<>();
for (int i = 0; i < virtualNodeNum; i++) { String virtualNodeValue = nodeValue + "#" + i; int virtualNodeIndex = getHash(virtualNodeValue); Node node = new Node(virtualNodeIndex, realNodeIndex, nodeValue); list.add(node); }
return list; }
public void deleteNode(String nodeValue) {
List<Node> nodeList = virtualization(nodeValue); virtualNodeRing.removeAll(nodeList); }
public static class Node { int virtualNodeIndex; int realNodeIndex; String nodeValue;
Node(int virtualNodeIndex, int realNodeValue, String nodeValue) { this.virtualNodeIndex = virtualNodeIndex; this.realNodeIndex = realNodeValue; this.nodeValue = nodeValue; } }
/** * 使用FNV1_32_HASH算法计算服务器的Hash值 */ private static int getHash(String str) {
final int p = 16777619; int hash = (int) 2166136261L; for (int i = 0; i < str.length(); i++) { hash = (hash ^ str.charAt(i)) * p; } hash += hash << 13; hash ^= hash >> 7; hash += hash << 3; hash ^= hash >> 17; hash += hash << 5;
// 如果算出来的值为负数则取其绝对值 if (hash < 0) { hash = Math.abs(hash); } return hash; }
public void printAllNodes() {
virtualNodeRing.forEach((node -> { System.out.println("node:" + node.nodeValue + ", virtualNodeIndex:" + node.virtualNodeIndex + ", realNodeIndex:" + node.realNodeIndex); })); }}
复制代码

测试用例:

public class ConsistantHashingTests {
@Before public void before() { System.out.println("before test"); }
static int[] samples = new int[1_000_000]; static { Random random = new Random(); for (int i = 0; i < 1_000_000; i++) { samples[i] = random.nextInt(); } }
@Test public void testVirtualNodeNum_10() {
testVirtualNodeNum(10); }
private void testVirtualNodeNum(int virtualNodeNum) {
ConsistantHashing consistantHashing = new ConsistantHashing(virtualNodeNum); Random random = new Random(); for (int i = 0; i < 10; i++) { String nodeValue = String.valueOf(random.nextInt()); consistantHashing.addNode(nodeValue); } /*consistantHashing.addNode("192.168.0.1"); consistantHashing.addNode("192.168.0.2"); consistantHashing.addNode("192.168.0.3"); consistantHashing.addNode("192.168.0.4"); consistantHashing.addNode("192.168.0.5"); consistantHashing.addNode("192.168.0.6"); consistantHashing.addNode("192.168.0.7"); consistantHashing.addNode("192.168.0.8"); consistantHashing.addNode("192.168.0.9"); consistantHashing.addNode("192.168.0.10");*/
// consistantHashing.printAllNodes();
Map<String, Double> count = new HashMap<>(); for (int i = 0; i < samples.length; i++) { String nodeValue = consistantHashing.route(String.valueOf(samples[i])); count.put(nodeValue, count.getOrDefault(nodeValue, 0d) + 1); } double[] array = new double[10]; int index = 0; for (String nodeValue : consistantHashing.getRealNodes()) { Double cnt = count.getOrDefault(nodeValue, 0d); array[index++] = cnt; System.out.println("node:" + nodeValue + ", count:" + cnt);
}
double result = standardDiviation(array);
System.out.println("virtualNodeNum:" + virtualNodeNum + ", standardDiviation:" + result); }
private static double standardDiviation(double[] x) { int m = x.length; double sum = 0; for (int i = 0; i < m; i++) { // 求和 sum += x[i]; } double dAve = sum / m; // 求平均值 double dVar = 0; for (int i = 0; i < m; i++) { // 求方差 dVar += (x[i] - dAve) * (x[i] - dAve); } return Math.sqrt(dVar / m); }
@Test public void testVirtualNodeNum_50() { testVirtualNodeNum(50); }
@Test public void testVirtualNodeNum_100() { testVirtualNodeNum(100); }
@Test public void testVirtualNodeNum_150() {
testVirtualNodeNum(150); }
@Test public void testVirtualNodeNum_200() {
testVirtualNodeNum(200); }}
复制代码

基于随机值测试结果:

before testnode:-363851994, count:76842.0node:-1007203159, count:71899.0node:480062607, count:128698.0node:-278208076, count:140297.0node:1812442287, count:117302.0node:1601083000, count:88377.0node:-302361225, count:63051.0node:756848540, count:84786.0node:1258695797, count:118109.0node:-218010507, count:110639.0virtualNodeNum:10, standardDiviation:24991.30750881194before testnode:-786945317, count:99318.0node:-1480713590, count:88469.0node:1166270450, count:97759.0node:-511617267, count:103281.0node:-1187947371, count:100899.0node:-172133583, count:99933.0node:2052371569, count:89601.0node:-1201038167, count:104076.0node:-1348917455, count:99179.0node:-549306187, count:117485.0virtualNodeNum:50, standardDiviation:7623.523201250194before testnode:1988760841, count:89849.0node:-2031834829, count:105710.0node:1837961962, count:87069.0node:682367618, count:88114.0node:1326095879, count:105669.0node:-498940234, count:96078.0node:8679918, count:112100.0node:1048357193, count:104753.0node:-1567675432, count:104694.0node:-1325258862, count:105964.0virtualNodeNum:100, standardDiviation:8475.011764003635before testnode:1346085706, count:104200.0node:-256719750, count:99556.0node:-1459894035, count:96762.0node:1963214592, count:114203.0node:-1509500490, count:105221.0node:-1118110135, count:94928.0node:1788974544, count:99032.0node:157108460, count:99124.0node:1922365323, count:104066.0node:-900891646, count:82908.0virtualNodeNum:150, standardDiviation:7703.273550900292before testnode:1001831880, count:104011.0node:-364242605, count:92376.0node:-1697498937, count:106230.0node:-297943883, count:93990.0node:2102747307, count:93053.0node:1216498465, count:102243.0node:-1393007427, count:107546.0node:-37892329, count:95786.0node:-1171086003, count:95220.0node:-239141897, count:109545.0virtualNodeNum:200, standardDiviation:6253.748411952627
复制代码

基于 IP 测试结果:

before testnode:192.168.0.1, count:150271.0node:192.168.0.2, count:75852.0node:192.168.0.3, count:89639.0node:192.168.0.4, count:65551.0node:192.168.0.5, count:92315.0node:192.168.0.6, count:87521.0node:192.168.0.7, count:115555.0node:192.168.0.8, count:142553.0node:192.168.0.9, count:101927.0node:192.168.0.10, count:78816.0virtualNodeNum:10, standardDiviation:26691.49481014505before testnode:192.168.0.1, count:101944.0node:192.168.0.2, count:118162.0node:192.168.0.3, count:107008.0node:192.168.0.4, count:105944.0node:192.168.0.5, count:76071.0node:192.168.0.6, count:91134.0node:192.168.0.7, count:91383.0node:192.168.0.8, count:113428.0node:192.168.0.9, count:113057.0node:192.168.0.10, count:81869.0virtualNodeNum:50, standardDiviation:13502.095615125823before testnode:192.168.0.1, count:118010.0node:192.168.0.2, count:108885.0node:192.168.0.3, count:99231.0node:192.168.0.4, count:93448.0node:192.168.0.5, count:78765.0node:192.168.0.6, count:95420.0node:192.168.0.7, count:92800.0node:192.168.0.8, count:103801.0node:192.168.0.9, count:92392.0node:192.168.0.10, count:117248.0virtualNodeNum:100, standardDiviation:11577.511753395027before testnode:192.168.0.1, count:100891.0node:192.168.0.2, count:109293.0node:192.168.0.3, count:86934.0node:192.168.0.4, count:95876.0node:192.168.0.5, count:96079.0node:192.168.0.6, count:104394.0node:192.168.0.7, count:95482.0node:192.168.0.8, count:104179.0node:192.168.0.9, count:95505.0node:192.168.0.10, count:111367.0virtualNodeNum:150, standardDiviation:7048.780163971635before testnode:192.168.0.1, count:110023.0node:192.168.0.2, count:110602.0node:192.168.0.3, count:92369.0node:192.168.0.4, count:102457.0node:192.168.0.5, count:89611.0node:192.168.0.6, count:105027.0node:192.168.0.7, count:100324.0node:192.168.0.8, count:92831.0node:192.168.0.9, count:93512.0node:192.168.0.10, count:103244.0virtualNodeNum:200, standardDiviation:7172.525426932971
复制代码

对比上面两个测试结果,发现基于 IP 的虚拟节点,差不多在 150 个虚拟节点的时候,分布最均匀;而随机的虚拟节点,虚拟节点越多,分布越均匀。原因大概是,基于 IP 的虚拟节点,各虚拟节点之间的 nodeValue 差不多,Hash 算法算出来的节点分布没有随机的均匀。


最开始直接用 nodeValue 的 hashCode() 方法生成虚拟节点的位置,测试结果及其不均匀。

结论:Hash 算法决定了节点分布的均衡度。常用的 Hash 算法有:CRC32_HASH、FNV1_32_HASH、KETAMA_HASH 等。


用户头像

关注

还未添加个人签名 2018.11.22 加入

还未添加个人简介

评论

发布
暂无评论
第五周作业