架构师训练营第 5 周作业

发布于: 2020 年 07 月 08 日

作业

用你熟悉的编程语言实现一致性 hash 算法。

编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。

内容

源码

// 哈希算法接口
public interface HashAlgorithm {
long hash(String string);
}
// CRC32哈希算法
public class Crc32HashAlgorithm implements HashAlgorithm {
ThreadLocal<CRC32> threadLocal = ThreadLocal.withInitial(() -> new CRC32());
@Override
public long hash(String string) {
CRC32 crc32 = threadLocal.get();
crc32.reset();
crc32.update(string.getBytes());
crc32.getValue();
return crc32.getValue();
}
}
// MD5哈希算法
public class MD5HashAlgorithm implements HashAlgorithm {
private MessageDigest instance;
public MD5HashAlgorithm() {
try {
instance = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
}
}
@Override
public long hash(String string) {
instance.reset();
instance.update(string.getBytes());
byte[] digest = instance.digest();
long h = 0;
for (int i = 0; i < 4; i++) {
h <<= 8;
h |= ((int) digest[i]) & 0xFF;
}
return h;
}
}
// Murmur哈希算法
public class MurmurHashAlgorithm implements HashAlgorithm {
@Override
public long hash(String string) {
int seed = 0x9747b28c;
byte[] data = string.getBytes();
int length = data.length;
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
final int m = 0x5bd1e995;
final int r = 24;
// Initialize the hash to a random value
int h = seed^length;
int length4 = length/4;
for (int i=0; i<length4; i++) {
final int i4 = i*4;
int k = (data[i4+0]&0xff) +((data[i4+1]&0xff)<<8)
+((data[i4+2]&0xff)<<16) +((data[i4+3]&0xff)<<24);
k *= m;
k ^= k >>> r;
k *= m;
h *= m;
h ^= k;
}
// Handle the last few bytes of the input array
switch (length%4) {
case 3: h ^= (data[(length&~3) +2]&0xff) << 16;
case 2: h ^= (data[(length&~3) +1]&0xff) << 8;
case 1: h ^= (data[length&~3]&0xff);
h *= m;
}
h ^= h >>> 13;
h *= m;
h ^= h >>> 15;
return h;
}
}
// 节点定义
public class Node {
@Getter
private String name;
private Map<String, String> cache;
public Node(String name) {
this.name = name;
cache = new ConcurrentHashMap<>();
}
public void put(String key, String value) {
cache.put(key, value);
}
public int size() {
return cache.size();
}
}
// 一致性哈希实现
public class ConsistentHash {
private List<Node> nodes;
private int vNum;
private HashAlgorithm algorithm;
private SortedMap<Long, Node> virtualNodeMap;
public ConsistentHash(List<Node> nodes, HashAlgorithm algorithm, int vNum) {
this.nodes = nodes;
this.algorithm = algorithm;
this.vNum = vNum;
virtualNodeMap = new TreeMap<>();
nodes.forEach(n -> {
for (int i = 0; i < vNum; i++) {
final String vNodeName = String.format("%s#%d", n.getName(), i);
virtualNodeMap.put(algorithm.hash(vNodeName), n);
}
});
}
public void put(String key, String value) {
Node node = selectNode(key);
node.put(key, value);
}
public void printSummary() {
System.out.println("Summary:");
nodes.forEach(n -> {
System.out.println("\t\tNode=" + n.getName() + ", Size=" + n.size());
});
}
public double standardDeviation() {
int total = nodes.stream().mapToInt(Node::size).sum();
if (total == 0) {
return 0;
}
double avg = total * 1.0 / nodes.size();
final double sum = nodes.stream().mapToDouble(n -> Math.pow(n.size() * 1.0 - avg, 2.0)).sum();
return Math.sqrt(sum / nodes.size());
}
private Node selectNode(String key) {
long hash = algorithm.hash(key);
SortedMap<Long, Node> subMap = virtualNodeMap.tailMap(hash);
if (subMap.isEmpty()) {
Long i = virtualNodeMap.firstKey();
return virtualNodeMap.get(i);
}
Long i = subMap.firstKey();
return subMap.get(i);
}
}
// 单元测试
public class ConsistentHashTest {
public static final List<String> DEFAULT_NODE_NAMES =
Arrays.asList("10.0.0.1", "10.0.0.2", "10.0.0.3", "10.0.0.4", "10.0.0.5", "10.0.0.6", "10.0.0.7", "10.0.0.8");
@Test
public void testWithMurmurHash() {
final HashAlgorithm algorithm = new MurmurHashAlgorithm();
runTestCases(algorithm);
}
@Test
public void testWithMD5Hash() {
final HashAlgorithm algorithm = new MD5HashAlgorithm();
runTestCases(algorithm);
}
@Test
public void testWithCrc32Hash() {
final HashAlgorithm algorithm = new Crc32HashAlgorithm();
runTestCases(algorithm);
}
@Test
public void testWithHashCode() {
final HashAlgorithm algorithm = s -> Math.abs(s.hashCode());
runTestCases(algorithm);
}
private void runTestCases(HashAlgorithm algorithm) {
System.out.println("\n run test cases with algorithm = " + algorithm.getClass().getName());
testWith(DEFAULT_NODE_NAMES, algorithm, 1_000, uuidInitial);
testWith(DEFAULT_NODE_NAMES, algorithm, 10_000, uuidInitial);
testWith(DEFAULT_NODE_NAMES, algorithm, 100_000, uuidInitial);
testWith(DEFAULT_NODE_NAMES, algorithm, 1_000, intInitial);
testWith(DEFAULT_NODE_NAMES, algorithm, 10_000, intInitial);
testWith(DEFAULT_NODE_NAMES, algorithm, 100_000, intInitial);
}
public void testWith(List<String> nodeNames, HashAlgorithm algorithm, int vNum, Consumer<ConsistentHash> initData) {
final List<Node> nodes = nodeNames.stream().map(Node::new).collect(Collectors.toList());
ConsistentHash consistentHash = new ConsistentHash(nodes, algorithm, vNum);
initData.accept(consistentHash);
// consistentHash.printSummary();
System.out.println(format(consistentHash.standardDeviation(), nodeNames.size(), algorithm.getClass().getSimpleName(), vNum, initData));
}
private Consumer<ConsistentHash> uuidInitial = consistentHash -> {
for (int i = 0; i < 1_000_000; i++) {
consistentHash.put(UUID.randomUUID().toString(), String.format("%d", i));
}
};
private Consumer<ConsistentHash> intInitial = consistentHash -> {
for (int i = 0; i < 1_000_000; i++) {
consistentHash.put(String.format("%d", i), String.format("%d", i));
}
};
private String format(double sd, int nodeSize, String hashName, int vNum, Consumer<ConsistentHash> dataInitial) {
String dataType = "unknown";
if (dataInitial == uuidInitial) {
dataType = "uuid";
}
if (dataInitial == intInitial) {
dataType = "int";
}
return String.format("标准差: %,f, 节点数: %d, 哈希算法: %s, 虚拟节点数: %s, 数据集: %s", sd, nodeSize, hashName, vNum, dataType);
}
}

运行结果

用户头像

Season

关注

还未添加个人签名 2019.09.28 加入

还未添加个人简介

评论

发布
暂无评论
架构师训练营第 5 周作业