week5- 作业 一致性 hash 算法

用户头像
a晖
关注
发布于: 2020 年 07 月 08 日

作业

  • 用你熟悉的编程语言实现一致性 hash 算法。

  • 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。



一致性hash是在分布式缓存集群中用来解决在服务器扩容时数据访问不一致问题,实现一致性hash算法先建立2^32 大的环,将服务器节点的若干个虚拟节点的hash值放在环上0-2^32-1,要计算的key值的hash值放在环上,沿着环顺时针查找离他最近的hash值服务器节点。



1、哈希环类 ConsistentHashRing



package cn.beinet;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/**
* 一致性哈希环类
*/
public class ConsistentHashRing {
TreeMap<Integer, Cache> map = new TreeMap<>();
List<Cache> servers = new ArrayList<>();
/**
* 往环中加入服务器
*
* @param servers 要加入的服务器列表
* @param virtualNumPerNode 每台服务器增加多少个虚拟节点,0表示不增加虚拟节点
*/
public void addServers(String[] servers, int virtualNumPerNode) {
if (servers != null) {
for (String server : servers) {
Cache cacheServer = new CacheServerMock(server);
map.put(getHashCode(server), cacheServer);
// 加虚拟节点
for (int i = 1; i <= virtualNumPerNode; i++) {
map.put(getVirutalHashCode(server, i), cacheServer);
}
this.servers.add(cacheServer);
}
}
}
/**
* 根据指定的缓存key,查找环上的对应服务器返回
*
* @param cacheKey 缓存key
* @return 服务器
*/
public Cache getServer(String cacheKey) {
if (map.size() <= 0)
throw new IllegalArgumentException("you must add server before call this method.");
if (cacheKey == null || cacheKey.isEmpty())
throw new IllegalArgumentException("cacheKey can't be empty");
int hash = getHashCode(cacheKey);
Map.Entry<Integer, Cache> server = map.ceilingEntry(hash);
if (server == null)
return map.firstEntry().getValue();
return server.getValue();
}
/**
* 返回第i个虚拟结点的hash值,注意不能用随机值,以免重启导致变化。
*
* @param key 服务器值
* @param i 第几个虚拟节点
* @return hash code
*/
private int getVirutalHashCode(String key, int i) {
key = key + "&&Virtual" + i;
return getHashCode(key);
}
public static int getHashCode(String key) {
// int ret = key.hashCode(); // 结果太聚集,不使用
final int p = 16777619;
int ret = (int) 2166136261L;
for (int i = 0; i < key.length(); i++) {
ret = (ret ^ key.charAt(i)) * p;
}
ret += ret << 13;
ret ^= ret >> 7;
ret += ret << 3;
ret ^= ret >> 17;
ret += ret << 5;
return ret < 0 ? -ret : ret;
}
@Override
public String toString() {
int totalNum = 0;
for (Cache cache : servers) {
CacheServerMock mock = (CacheServerMock) cache;
totalNum += mock.getCaches().size();
}
int avg = totalNum / servers.size();
StringBuilder sb = new StringBuilder();
sb.append("总缓存个数:").append(totalNum)
.append(" 服务器台数:").append(servers.size())
.append(" 每服务器虚拟节点数:").append(map.size()/servers.size() - 1)
.append(" 平均值:").append(avg)
.append("\r\n");
// 标准差公式: (每个样本 - 平均值)的平方 / (服务器数 - 1),结果再取平方根
long diff = 0;
for (Cache cache : servers) {
CacheServerMock mock = (CacheServerMock) cache;
int size = mock.getCaches().size();
sb.append("Server-Name:")
.append(mock.getServerName())
.append(" Cache-num: ")
.append(size)
.append("\r\n");
long tmp = size - avg;
diff += (tmp * tmp);
}
diff /= (servers.size() - 1);
sb.append("标准差为: ").append(Math.sqrt(diff)).append("\r\n");
return sb.toString();
}
}



2、Cache接口:

package cn.beinet;
public interface Cache {
/**
* 根据key读取缓存数据
* @param key key
* @return 缓存
*/
String get(String key);
/**
* 设置缓存
* @param key 缓存key
* @param val 缓存值
*/
void set(String key, String val);
}



3,CacheImplement类:

package cn.beinet;
public class CacheImplement implements Cache {
private ConsistentHashRing ring = new ConsistentHashRing();
private String[] servers;
public CacheImplement(String[] servers, int virtualNumPerNode) {
ring.addServers(servers, virtualNumPerNode);
}
@Override
public String get(String key) {
Cache serverKey = ring.getServer(key);
return serverKey.get(key);
}
@Override
public void set(String key, String val) {
Cache serverKey = ring.getServer(key);
serverKey.set(key, val);
}
@Override
public String toString() {
return ring.toString();
}
}



4,CacheServerMock类:

package cn.beinet;
import java.util.HashMap;
import java.util.Map;
/**
* 具体缓存服务器通讯类,这里用Mock类代替
*/
public class CacheServerMock implements Cache {
private Map<String, String> caches = new HashMap<>();
private String serverName;
public CacheServerMock(String serverName) {
this.serverName = serverName;
}
public String getServerName() {
return serverName;
}
public Map<String, String> getCaches() {
return caches;
}
@Override
public String get(String key) {
return caches.get(key);
}
@Override
public void set(String key, String val) {
caches.put(key, val);
}
}



5,main函数测试代码:

package cn.beinet;
public class Main {
static String[] servers = new String[]{
"10.0.0.1",
"10.0.0.2",
"10.0.0.3",
"10.0.0.4",
"10.0.0.5",
"10.0.0.6",
"10.0.0.7",
"10.0.0.8",
"10.0.0.9",
"10.0.0.10",
};
static int maxCacheNum = 1000000;
public static void main(String[] args) {
testCache(0);
testCache(1);
testCache(10);
testCache(100);
}
static void testCache(int virtualNumPerNode) {
Cache cache = new CacheImplement(servers, virtualNumPerNode);
fillCache(cache, maxCacheNum);
System.out.println(cache);
}
static void fillCache(Cache cache, int cacheNum) {
for (int i = 0; i < cacheNum; i++) {
String key = "key" + i;
//System.out.println(String.valueOf(i) + ":" + (ConsistentHashRing.getHashCode(key)));
String val = "val" + key;
cache.set(key, val);
}
}
}

6、最终结果输出:

总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:0 平均值:100000
Server-Name:10.0.0.1 Cache-num: 126281
Server-Name:10.0.0.2 Cache-num: 6669
Server-Name:10.0.0.3 Cache-num: 154480
Server-Name:10.0.0.4 Cache-num: 138847
Server-Name:10.0.0.5 Cache-num: 11541
Server-Name:10.0.0.6 Cache-num: 357376
Server-Name:10.0.0.7 Cache-num: 91493
Server-Name:10.0.0.8 Cache-num: 13296
Server-Name:10.0.0.9 Cache-num: 82488
Server-Name:10.0.0.10 Cache-num: 17529
标准差为: 106793.68220545632

总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:1 平均值:100000
Server-Name:10.0.0.1 Cache-num: 106183
Server-Name:10.0.0.2 Cache-num: 96674
Server-Name:10.0.0.3 Cache-num: 50094
Server-Name:10.0.0.4 Cache-num: 168298
Server-Name:10.0.0.5 Cache-num: 101609
Server-Name:10.0.0.6 Cache-num: 209265
Server-Name:10.0.0.7 Cache-num: 91493
Server-Name:10.0.0.8 Cache-num: 25798
Server-Name:10.0.0.9 Cache-num: 69784
Server-Name:10.0.0.10 Cache-num: 80802
标准差为: 53754.40262899403

总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:10 平均值:100000
Server-Name:10.0.0.1 Cache-num: 115236
Server-Name:10.0.0.2 Cache-num: 86865
Server-Name:10.0.0.3 Cache-num: 103043
Server-Name:10.0.0.4 Cache-num: 114599
Server-Name:10.0.0.5 Cache-num: 130974
Server-Name:10.0.0.6 Cache-num: 77227
Server-Name:10.0.0.7 Cache-num: 121106
Server-Name:10.0.0.8 Cache-num: 84139
Server-Name:10.0.0.9 Cache-num: 63553
Server-Name:10.0.0.10 Cache-num: 103258
标准差为: 21450.422699797782

总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:100 平均值:100000
Server-Name:10.0.0.1 Cache-num: 119498
Server-Name:10.0.0.2 Cache-num: 91413
Server-Name:10.0.0.3 Cache-num: 107542
Server-Name:10.0.0.4 Cache-num: 90998
Server-Name:10.0.0.5 Cache-num: 107033
Server-Name:10.0.0.6 Cache-num: 96690
Server-Name:10.0.0.7 Cache-num: 101308
Server-Name:10.0.0.8 Cache-num: 89079
Server-Name:10.0.0.9 Cache-num: 90149
Server-Name:10.0.0.10 Cache-num: 106290
标准差为: 10054.467962055476



参考:

https://github.com/youbl/study/tree/master/ConsistentHashProject

用户头像

a晖

关注

还未添加个人签名 2018.12.05 加入

还未添加个人简介

评论

发布
暂无评论
week5-作业  一致性 hash 算法