第五周作业
发布于: 2020 年 10 月 25 日
用你熟悉的编程语言实现一致性 hash 算法。
编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。
一致性哈希算法实现:
public class ConsistantHashing {
// 虚拟节点数
private int virtualNodeNum;
private Comparator<Node> comparator = Comparator.comparingInt(o -> o.virtualNodeIndex);
private TreeSet<Node> virtualNodeRing = new TreeSet<>(comparator);
private List<String> realNodes = new ArrayList<>();
public ConsistantHashing(int virtualNodeNum) {
this.virtualNodeNum = virtualNodeNum;
}
public List<String> getRealNodes() {
return realNodes;
}
public String route(String key) {
int index = getHash(key);
Node node = findNode(index);
// System.out.println("key:" + key + ", index:" + index + ", node:" + node.nodeValue);
return node.nodeValue;
}
private Node findNode(int index) {
Node higher = virtualNodeRing.higher(new Node(index, index, null));
if (higher == null) {
return virtualNodeRing.first();
}
return higher;
}
public void addNode(String nodeValue) {
List<Node> nodeList = virtualization(nodeValue);
virtualNodeRing.addAll(nodeList);
realNodes.add(nodeValue);
}
private List<Node> virtualization(String nodeValue) {
int realNodeIndex = getHash(nodeValue);
List<Node> list = new ArrayList<>();
for (int i = 0; i < virtualNodeNum; i++) {
String virtualNodeValue = nodeValue + "#" + i;
int virtualNodeIndex = getHash(virtualNodeValue);
Node node = new Node(virtualNodeIndex, realNodeIndex, nodeValue);
list.add(node);
}
return list;
}
public void deleteNode(String nodeValue) {
List<Node> nodeList = virtualization(nodeValue);
virtualNodeRing.removeAll(nodeList);
}
public static class Node {
int virtualNodeIndex;
int realNodeIndex;
String nodeValue;
Node(int virtualNodeIndex, int realNodeValue, String nodeValue) {
this.virtualNodeIndex = virtualNodeIndex;
this.realNodeIndex = realNodeValue;
this.nodeValue = nodeValue;
}
}
/**
* 使用FNV1_32_HASH算法计算服务器的Hash值
*/
private static int getHash(String str) {
final int p = 16777619;
int hash = (int) 2166136261L;
for (int i = 0; i < str.length(); i++) {
hash = (hash ^ str.charAt(i)) * p;
}
hash += hash << 13;
hash ^= hash >> 7;
hash += hash << 3;
hash ^= hash >> 17;
hash += hash << 5;
// 如果算出来的值为负数则取其绝对值
if (hash < 0) {
hash = Math.abs(hash);
}
return hash;
}
public void printAllNodes() {
virtualNodeRing.forEach((node -> {
System.out.println("node:" + node.nodeValue + ", virtualNodeIndex:" + node.virtualNodeIndex + ", realNodeIndex:" + node.realNodeIndex);
}));
}
}
复制代码
测试用例:
public class ConsistantHashingTests {
@Before
public void before() {
System.out.println("before test");
}
static int[] samples = new int[1_000_000];
static {
Random random = new Random();
for (int i = 0; i < 1_000_000; i++) {
samples[i] = random.nextInt();
}
}
@Test
public void testVirtualNodeNum_10() {
testVirtualNodeNum(10);
}
private void testVirtualNodeNum(int virtualNodeNum) {
ConsistantHashing consistantHashing = new ConsistantHashing(virtualNodeNum);
Random random = new Random();
for (int i = 0; i < 10; i++) {
String nodeValue = String.valueOf(random.nextInt());
consistantHashing.addNode(nodeValue);
}
/*consistantHashing.addNode("192.168.0.1");
consistantHashing.addNode("192.168.0.2");
consistantHashing.addNode("192.168.0.3");
consistantHashing.addNode("192.168.0.4");
consistantHashing.addNode("192.168.0.5");
consistantHashing.addNode("192.168.0.6");
consistantHashing.addNode("192.168.0.7");
consistantHashing.addNode("192.168.0.8");
consistantHashing.addNode("192.168.0.9");
consistantHashing.addNode("192.168.0.10");*/
// consistantHashing.printAllNodes();
Map<String, Double> count = new HashMap<>();
for (int i = 0; i < samples.length; i++) {
String nodeValue = consistantHashing.route(String.valueOf(samples[i]));
count.put(nodeValue, count.getOrDefault(nodeValue, 0d) + 1);
}
double[] array = new double[10];
int index = 0;
for (String nodeValue : consistantHashing.getRealNodes()) {
Double cnt = count.getOrDefault(nodeValue, 0d);
array[index++] = cnt;
System.out.println("node:" + nodeValue + ", count:" + cnt);
}
double result = standardDiviation(array);
System.out.println("virtualNodeNum:" + virtualNodeNum + ", standardDiviation:" + result);
}
private static double standardDiviation(double[] x) {
int m = x.length;
double sum = 0;
for (int i = 0; i < m; i++) { // 求和
sum += x[i];
}
double dAve = sum / m; // 求平均值
double dVar = 0;
for (int i = 0; i < m; i++) { // 求方差
dVar += (x[i] - dAve) * (x[i] - dAve);
}
return Math.sqrt(dVar / m);
}
@Test
public void testVirtualNodeNum_50() {
testVirtualNodeNum(50);
}
@Test
public void testVirtualNodeNum_100() {
testVirtualNodeNum(100);
}
@Test
public void testVirtualNodeNum_150() {
testVirtualNodeNum(150);
}
@Test
public void testVirtualNodeNum_200() {
testVirtualNodeNum(200);
}
}
复制代码
基于随机值测试结果:
before test
node:-363851994, count:76842.0
node:-1007203159, count:71899.0
node:480062607, count:128698.0
node:-278208076, count:140297.0
node:1812442287, count:117302.0
node:1601083000, count:88377.0
node:-302361225, count:63051.0
node:756848540, count:84786.0
node:1258695797, count:118109.0
node:-218010507, count:110639.0
virtualNodeNum:10, standardDiviation:24991.30750881194
before test
node:-786945317, count:99318.0
node:-1480713590, count:88469.0
node:1166270450, count:97759.0
node:-511617267, count:103281.0
node:-1187947371, count:100899.0
node:-172133583, count:99933.0
node:2052371569, count:89601.0
node:-1201038167, count:104076.0
node:-1348917455, count:99179.0
node:-549306187, count:117485.0
virtualNodeNum:50, standardDiviation:7623.523201250194
before test
node:1988760841, count:89849.0
node:-2031834829, count:105710.0
node:1837961962, count:87069.0
node:682367618, count:88114.0
node:1326095879, count:105669.0
node:-498940234, count:96078.0
node:8679918, count:112100.0
node:1048357193, count:104753.0
node:-1567675432, count:104694.0
node:-1325258862, count:105964.0
virtualNodeNum:100, standardDiviation:8475.011764003635
before test
node:1346085706, count:104200.0
node:-256719750, count:99556.0
node:-1459894035, count:96762.0
node:1963214592, count:114203.0
node:-1509500490, count:105221.0
node:-1118110135, count:94928.0
node:1788974544, count:99032.0
node:157108460, count:99124.0
node:1922365323, count:104066.0
node:-900891646, count:82908.0
virtualNodeNum:150, standardDiviation:7703.273550900292
before test
node:1001831880, count:104011.0
node:-364242605, count:92376.0
node:-1697498937, count:106230.0
node:-297943883, count:93990.0
node:2102747307, count:93053.0
node:1216498465, count:102243.0
node:-1393007427, count:107546.0
node:-37892329, count:95786.0
node:-1171086003, count:95220.0
node:-239141897, count:109545.0
virtualNodeNum:200, standardDiviation:6253.748411952627
复制代码
基于 IP 测试结果:
before test
node:192.168.0.1, count:150271.0
node:192.168.0.2, count:75852.0
node:192.168.0.3, count:89639.0
node:192.168.0.4, count:65551.0
node:192.168.0.5, count:92315.0
node:192.168.0.6, count:87521.0
node:192.168.0.7, count:115555.0
node:192.168.0.8, count:142553.0
node:192.168.0.9, count:101927.0
node:192.168.0.10, count:78816.0
virtualNodeNum:10, standardDiviation:26691.49481014505
before test
node:192.168.0.1, count:101944.0
node:192.168.0.2, count:118162.0
node:192.168.0.3, count:107008.0
node:192.168.0.4, count:105944.0
node:192.168.0.5, count:76071.0
node:192.168.0.6, count:91134.0
node:192.168.0.7, count:91383.0
node:192.168.0.8, count:113428.0
node:192.168.0.9, count:113057.0
node:192.168.0.10, count:81869.0
virtualNodeNum:50, standardDiviation:13502.095615125823
before test
node:192.168.0.1, count:118010.0
node:192.168.0.2, count:108885.0
node:192.168.0.3, count:99231.0
node:192.168.0.4, count:93448.0
node:192.168.0.5, count:78765.0
node:192.168.0.6, count:95420.0
node:192.168.0.7, count:92800.0
node:192.168.0.8, count:103801.0
node:192.168.0.9, count:92392.0
node:192.168.0.10, count:117248.0
virtualNodeNum:100, standardDiviation:11577.511753395027
before test
node:192.168.0.1, count:100891.0
node:192.168.0.2, count:109293.0
node:192.168.0.3, count:86934.0
node:192.168.0.4, count:95876.0
node:192.168.0.5, count:96079.0
node:192.168.0.6, count:104394.0
node:192.168.0.7, count:95482.0
node:192.168.0.8, count:104179.0
node:192.168.0.9, count:95505.0
node:192.168.0.10, count:111367.0
virtualNodeNum:150, standardDiviation:7048.780163971635
before test
node:192.168.0.1, count:110023.0
node:192.168.0.2, count:110602.0
node:192.168.0.3, count:92369.0
node:192.168.0.4, count:102457.0
node:192.168.0.5, count:89611.0
node:192.168.0.6, count:105027.0
node:192.168.0.7, count:100324.0
node:192.168.0.8, count:92831.0
node:192.168.0.9, count:93512.0
node:192.168.0.10, count:103244.0
virtualNodeNum:200, standardDiviation:7172.525426932971
复制代码
对比上面两个测试结果,发现基于 IP 的虚拟节点,差不多在 150 个虚拟节点的时候,分布最均匀;而随机的虚拟节点,虚拟节点越多,分布越均匀。原因大概是,基于 IP 的虚拟节点,各虚拟节点之间的 nodeValue 差不多,Hash 算法算出来的节点分布没有随机的均匀。
最开始直接用 nodeValue 的 hashCode() 方法生成虚拟节点的位置,测试结果及其不均匀。
结论:Hash 算法决定了节点分布的均衡度。常用的 Hash 算法有:CRC32_HASH、FNV1_32_HASH、KETAMA_HASH 等。
划线
评论
复制
发布于: 2020 年 10 月 25 日阅读数: 33
羲
关注
还未添加个人签名 2018.11.22 加入
还未添加个人简介
评论