Week5- 作业

发布于: 2020 年 07 月 08 日

用你熟悉的编程语言实现一致性 hash 算法。

编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。

package com.study.tmp;
import java.util.*;
class HashRel {
//
//服务器数量
private int host_count;
//虚拟节点数量
private int virtual_count;
SortedMap<Integer, String> circle_map;
public HashRel(int host_count, int virtual_count) {
circle_map = new TreeMap<Integer, String>();
this.host_count = host_count;
this.virtual_count = virtual_count;
}
//创建hash对应关系
public void getRel() {
//建立hashcode和主机的对应关系
for (int i = 0; i < host_count; i++) {
String host = "host" + "_" + String.valueOf(i);
for (int j = 0; j < virtual_count; j++) {
String host_virtual_name = host + "_" + String.valueOf(j);
int hash = 0;
System.out.println("host_virtual_name.hashcode:" + Math.abs(host_virtual_name.hashCode()));
hash = Math.abs(host_virtual_name.hashCode());
circle_map.put(hash, host);
}
}
}
//取真是节点
public String getHost(String data) {
int hash = data.hashCode();
hash = Math.abs(hash);
System.out.println("getHost hash:" + hash);
if (circle_map.isEmpty()) {
return null;
}
if (!circle_map.containsKey(hash)) {
SortedMap<Integer, String> tailMap = circle_map.tailMap(hash);// 沿环的顺时针找到一个虚拟节点
if (tailMap.isEmpty()) {
hash = circle_map.firstKey();
System.out.println("empty:" + hash);
} else {
hash = tailMap.firstKey();
System.out.println("not empty:" + hash);
}
}
System.out.println("目标主机:" + circle_map.get(hash));
return circle_map.get(hash);
}
@Override
public String toString() {
String str = "";
for (Map.Entry<Integer, String> entry : circle_map.entrySet()) {
System.out.println("Key = " + entry.getKey() + ", Value = " + entry.getValue());
str = str + "Key = " + entry.getKey() + ", Value = " + entry.getValue();
}
return str;
}
}
public class HashTest {
//求标准差
public double getStandardDeviation(Map<String, Integer> map) {
int sum = 0;
for (Map.Entry<String, Integer> entry : map.entrySet()) {
System.out.println("host = " + entry.getKey() + ", count = " + entry.getValue());
sum += entry.getValue(); //求出数组的总和
}
System.out.println(sum);
double average = sum / map.size(); //求出数组的平均数
System.out.println(average); //
int total = 0;
for (Map.Entry<String, Integer> entry2 : map.entrySet()) {
total += (entry2.getValue() - average) * (entry2.getValue() - average); //求出方差,如果要计算方差的话这一步就可以了
}
double standardDeviation = Math.sqrt(total / map.size()); //求出标准差
System.out.println(standardDeviation);
return standardDeviation;
}
public static void main(String[] args) {
int host_count = 10;
int virtual_count = 5000;
int data_count = 1000000;
HashTest hashTest = new HashTest();
//每台机器真是记录数
Map<String, Integer> map = new HashMap<String, Integer>();
//建立服务器与哈希值的对应关系
HashRel hashRel = new HashRel(host_count, virtual_count);
hashRel.getRel();
//hashRel.toString();
// String data = UUID.randomUUID().toString();
// hashRel.getHost(data);
//初始化所有的机器上数据为0
for (int i = 0; i < host_count; i++) {
String host = "host" + "_" + String.valueOf(i);
map.put(host, 0);
}
//遍历数据匹配对应主机
for (int j = 0; j < data_count; j++) {
String data = UUID.randomUUID().toString();
String host = hashRel.getHost(data);
// 每台真实机器节点上保存的记录条数加1
int count_tmp = map.get(host) + 1;
map.put(host, count_tmp);
}
//查看主机 map的数据分布情况
for (Map.Entry<String, Integer> entry : map.entrySet()) {
System.out.println("host = " + entry.getKey() + ", count = " + entry.getValue());
}
//计算标准差
System.out.println("标准差:" + hashTest.getStandardDeviation(map));
}
}
结果:
host = host_1, count = 13681
host = host_2, count = 13599
host = host_0, count = 728601
host = host_5, count = 13780
host = host_6, count = 13708
host = host_3, count = 11533
host = host_4, count = 13684
host = host_9, count = 164197
host = host_7, count = 13617
host = host_8, count = 13600
1000000
100000.0
14654.29507004687
标准差:14654.29507004687
pS:这个分布不好,需要再调整调整。。

用户头像

龙7

关注

还未添加个人签名 2019.02.12 加入

还未添加个人简介

评论

发布
暂无评论
Week5-作业