架构 0 期 -week5- 命题作业

发布于: 2020 年 07 月 10 日

  • 用你熟悉的编程语言实现一致性 hash 算法。

  • 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。

public class ConsistentHash {
private final List<String> realNodes;
public ConsistentHash(List<String> realNodes) {
this.realNodes = realNodes;
}
public static Set<String> buildCacheKeys(int n) {
Set<String> keySet = new HashSet<>();
while (keySet.size() < n) {
String key = UUID.randomUUID().toString();
if (keySet.contains(key)) {
continue;
}
keySet.add(key);
}
return keySet;
}
public static void main(String[] args) {
ImmutableList<String> realNodes = ImmutableList.of(
"10.9.4.10",
"10.9.4.11",
"10.9.4.12",
"10.9.4.13",
"10.9.4.14",
"10.9.4.15",
"10.9.4.16",
"10.9.4.17",
"10.9.4.18",
"10.9.4.19");
// 1.生成 keyCount 个 key
int keyCount = 1_000_000;
Set<String> cacheKeys = ConsistentHash.buildCacheKeys(keyCount);
ConsistentHash consistentHash = new ConsistentHash(realNodes);
double min = Double.MAX_VALUE;
int suitableVirtualNodesPerRealNode = 0;
int lowest = 220;
int highest = 270;
for (int i = lowest; i <= highest; i++) {
// 2.按每个节点扩展到 i 个虚拟节点,构建虚拟节点(的 hash 值)对应真实节点的映射
SortedMap<Integer, String> virtualHashToNodeMap = consistentHash.buildVirtualHashToNode(i);
// 3.把 keyCount 个 key 分布到真实节点上去,得到真是节点分配的 key 数量
Map<String, Integer> realNodeToKeyCountMap = consistentHash.distributeKeys(cacheKeys, virtualHashToNodeMap);
Set<Double> keyCountPerRealNodeSet = realNodeToKeyCountMap.values().stream().map(Double::new).collect(Collectors.toSet());
// 4.计算真实节点分配到的 key 数量的样本标准差
double std = CalSta.sampleStdDev(keyCountPerRealNodeSet);
// 5.寻找最小标准差及虚拟节点数
if (min > std) {
min = std;
suitableVirtualNodesPerRealNode = i;
}
if (i % 5 == 0) {
System.out.println(String.format("i: %s, std: %s", i, std));
}
}
System.out.println(String.format("suitableVirtualNodesPerRealNode: %s, StdDeviation: %s", suitableVirtualNodesPerRealNode, min));
}
public SortedMap<Integer, String> buildVirtualHashToNode(int virtualNodePerRealNode) {
SortedMap<Integer, String> virtualHashToNodeMap = new TreeMap<>();
for (String node : realNodes) {
for (int i = 0; i < virtualNodePerRealNode; i++) {
String vNodeName = node + "&&VN" + i;
int hash = getHash(vNodeName);
virtualHashToNodeMap.put(hash, node);
}
}
return virtualHashToNodeMap;
}
public Map<String, Integer> distributeKeys(Set<String> keys, SortedMap<Integer, String> virtualHashToNodeMap) {
Map<String, Integer> realNodeToKeyCountMap = Maps.newHashMap();
for (String s : keys) {
int hash = getHash(s);
SortedMap<Integer, String> subMap = virtualHashToNodeMap.tailMap(hash);
String node = subMap.isEmpty() ? virtualHashToNodeMap.get(virtualHashToNodeMap.firstKey()) : subMap.get(subMap.firstKey());
Integer keyCount = realNodeToKeyCountMap.getOrDefault(node, 0);
realNodeToKeyCountMap.put(node, keyCount + 1);
}
return realNodeToKeyCountMap;
}
public int getHash(String str) {
final int p = 16777619;
int hash = (int) 2166136261L;
for (int i = 0; i < str.length(); i++) {
hash = (hash ^ str.charAt(i)) * p;
}
hash += hash << 13;
hash ^= hash >> 7;
hash += hash << 3;
hash ^= hash >> 17;
hash += hash << 5;
if (hash < 0) {
hash = Math.abs(hash);
}
return hash;
}
}

/**
* https://blog.csdn.net/l1028386804/article/details/54573106
* @author chenjun
*/
public class CalSta {
public static void main(String[] args) {
Set<Double> testData = Sets.newHashSet(2d, 4d, 6d, 7d, 8d, 9d, 12d, 36d);
System.out.println("总和Sum " + CalSta.sum(testData));
System.out.println("平均值Mean " + CalSta.mean(testData));
System.out.println("总体方差Population Variance " + CalSta.popVariance(testData));
System.out.println("总体标准差Population STD_dev " + CalSta.popStdDev(testData));
System.out.println("样本方差Sample Variance " + CalSta.sampleVariance(testData));
System.out.println("样本标准差Sample STD_dev " + CalSta.sampleStdDev(testData));
}
public static double sum(Set<Double> data) {
double sum = 0;
for (double datum : data) {
sum = sum + datum;
}
return sum;
}
public static double mean(Set<Double> data) {
double mean;
mean = sum(data) / data.size();
return mean;
}
/**
* population variance 总体方差
*
* @param data 数据
* @return 总体方差
*/
public static double popVariance(Set<Double> data) {
double variance = 0;
for (double datum : data) {
variance = variance + (Math.pow((datum - mean(data)), 2));
}
variance = variance / data.size();
return variance;
}
/**
* population standard deviation 总体标准差
*
* @param data 数据
* @return 样本标准差
*/
public static double popStdDev(Set<Double> data) {
double stdDev;
stdDev = Math.sqrt(popVariance(data));
return stdDev;
}
/**
* sample variance 样本方差
*
* @param data 数据
* @return 样本标准差
*/
public static double sampleVariance(Set<Double> data) {
double variance = 0;
for (double datum : data) {
variance = variance + (Math.pow((datum - mean(data)), 2));
}
variance = variance / (data.size() - 1);
return variance;
}
/**
* sample standard deviation 样本标准差
*
* @param data 数据
* @return 样本标准差
*/
public static double sampleStdDev(Set<Double> data) {
double stdDev;
stdDev = Math.sqrt(sampleVariance(data));
return stdDev;
}
}

用户头像

陈俊

关注

还未添加个人签名 2017.09.10 加入

还未添加个人简介

评论

发布
暂无评论
架构 0 期 -week5- 命题作业