week5- 作业 一致性 hash 算法
发布于: 2020 年 07 月 08 日
作业
用你熟悉的编程语言实现一致性 hash 算法。
编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。
一致性hash是在分布式缓存集群中用来解决在服务器扩容时数据访问不一致问题,实现一致性hash算法先建立2^32 大的环,将服务器节点的若干个虚拟节点的hash值放在环上0-2^32-1,要计算的key值的hash值放在环上,沿着环顺时针查找离他最近的hash值服务器节点。
1、哈希环类 ConsistentHashRing
package cn.beinet;import java.util.ArrayList;import java.util.List;import java.util.Map;import java.util.TreeMap;/** * 一致性哈希环类 */public class ConsistentHashRing { TreeMap<Integer, Cache> map = new TreeMap<>(); List<Cache> servers = new ArrayList<>(); /** * 往环中加入服务器 * * @param servers 要加入的服务器列表 * @param virtualNumPerNode 每台服务器增加多少个虚拟节点,0表示不增加虚拟节点 */ public void addServers(String[] servers, int virtualNumPerNode) { if (servers != null) { for (String server : servers) { Cache cacheServer = new CacheServerMock(server); map.put(getHashCode(server), cacheServer); // 加虚拟节点 for (int i = 1; i <= virtualNumPerNode; i++) { map.put(getVirutalHashCode(server, i), cacheServer); } this.servers.add(cacheServer); } } } /** * 根据指定的缓存key,查找环上的对应服务器返回 * * @param cacheKey 缓存key * @return 服务器 */ public Cache getServer(String cacheKey) { if (map.size() <= 0) throw new IllegalArgumentException("you must add server before call this method."); if (cacheKey == null || cacheKey.isEmpty()) throw new IllegalArgumentException("cacheKey can't be empty"); int hash = getHashCode(cacheKey); Map.Entry<Integer, Cache> server = map.ceilingEntry(hash); if (server == null) return map.firstEntry().getValue(); return server.getValue(); } /** * 返回第i个虚拟结点的hash值,注意不能用随机值,以免重启导致变化。 * * @param key 服务器值 * @param i 第几个虚拟节点 * @return hash code */ private int getVirutalHashCode(String key, int i) { key = key + "&&Virtual" + i; return getHashCode(key); } public static int getHashCode(String key) { // int ret = key.hashCode(); // 结果太聚集,不使用 final int p = 16777619; int ret = (int) 2166136261L; for (int i = 0; i < key.length(); i++) { ret = (ret ^ key.charAt(i)) * p; } ret += ret << 13; ret ^= ret >> 7; ret += ret << 3; ret ^= ret >> 17; ret += ret << 5; return ret < 0 ? -ret : ret; } @Override public String toString() { int totalNum = 0; for (Cache cache : servers) { CacheServerMock mock = (CacheServerMock) cache; totalNum += mock.getCaches().size(); } int avg = totalNum / servers.size(); StringBuilder sb = new StringBuilder(); sb.append("总缓存个数:").append(totalNum) .append(" 服务器台数:").append(servers.size()) .append(" 每服务器虚拟节点数:").append(map.size()/servers.size() - 1) .append(" 平均值:").append(avg) .append("\r\n"); // 标准差公式: (每个样本 - 平均值)的平方 / (服务器数 - 1),结果再取平方根 long diff = 0; for (Cache cache : servers) { CacheServerMock mock = (CacheServerMock) cache; int size = mock.getCaches().size(); sb.append("Server-Name:") .append(mock.getServerName()) .append(" Cache-num: ") .append(size) .append("\r\n"); long tmp = size - avg; diff += (tmp * tmp); } diff /= (servers.size() - 1); sb.append("标准差为: ").append(Math.sqrt(diff)).append("\r\n"); return sb.toString(); }}
2、Cache接口:
package cn.beinet;public interface Cache { /** * 根据key读取缓存数据 * @param key key * @return 缓存 */ String get(String key); /** * 设置缓存 * @param key 缓存key * @param val 缓存值 */ void set(String key, String val);}
3,CacheImplement类:
package cn.beinet;public class CacheImplement implements Cache { private ConsistentHashRing ring = new ConsistentHashRing(); private String[] servers; public CacheImplement(String[] servers, int virtualNumPerNode) { ring.addServers(servers, virtualNumPerNode); } @Override public String get(String key) { Cache serverKey = ring.getServer(key); return serverKey.get(key); } @Override public void set(String key, String val) { Cache serverKey = ring.getServer(key); serverKey.set(key, val); } @Override public String toString() { return ring.toString(); }}
4,CacheServerMock类:
package cn.beinet;import java.util.HashMap;import java.util.Map;/** * 具体缓存服务器通讯类,这里用Mock类代替 */public class CacheServerMock implements Cache { private Map<String, String> caches = new HashMap<>(); private String serverName; public CacheServerMock(String serverName) { this.serverName = serverName; } public String getServerName() { return serverName; } public Map<String, String> getCaches() { return caches; } @Override public String get(String key) { return caches.get(key); } @Override public void set(String key, String val) { caches.put(key, val); }}
5,main函数测试代码:
package cn.beinet;public class Main { static String[] servers = new String[]{ "10.0.0.1", "10.0.0.2", "10.0.0.3", "10.0.0.4", "10.0.0.5", "10.0.0.6", "10.0.0.7", "10.0.0.8", "10.0.0.9", "10.0.0.10", }; static int maxCacheNum = 1000000; public static void main(String[] args) { testCache(0); testCache(1); testCache(10); testCache(100); } static void testCache(int virtualNumPerNode) { Cache cache = new CacheImplement(servers, virtualNumPerNode); fillCache(cache, maxCacheNum); System.out.println(cache); } static void fillCache(Cache cache, int cacheNum) { for (int i = 0; i < cacheNum; i++) { String key = "key" + i; //System.out.println(String.valueOf(i) + ":" + (ConsistentHashRing.getHashCode(key))); String val = "val" + key; cache.set(key, val); } }}
6、最终结果输出:
总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:0 平均值:100000Server-Name:10.0.0.1 Cache-num: 126281Server-Name:10.0.0.2 Cache-num: 6669Server-Name:10.0.0.3 Cache-num: 154480Server-Name:10.0.0.4 Cache-num: 138847Server-Name:10.0.0.5 Cache-num: 11541Server-Name:10.0.0.6 Cache-num: 357376Server-Name:10.0.0.7 Cache-num: 91493Server-Name:10.0.0.8 Cache-num: 13296Server-Name:10.0.0.9 Cache-num: 82488Server-Name:10.0.0.10 Cache-num: 17529标准差为: 106793.68220545632总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:1 平均值:100000Server-Name:10.0.0.1 Cache-num: 106183Server-Name:10.0.0.2 Cache-num: 96674Server-Name:10.0.0.3 Cache-num: 50094Server-Name:10.0.0.4 Cache-num: 168298Server-Name:10.0.0.5 Cache-num: 101609Server-Name:10.0.0.6 Cache-num: 209265Server-Name:10.0.0.7 Cache-num: 91493Server-Name:10.0.0.8 Cache-num: 25798Server-Name:10.0.0.9 Cache-num: 69784Server-Name:10.0.0.10 Cache-num: 80802标准差为: 53754.40262899403总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:10 平均值:100000Server-Name:10.0.0.1 Cache-num: 115236Server-Name:10.0.0.2 Cache-num: 86865Server-Name:10.0.0.3 Cache-num: 103043Server-Name:10.0.0.4 Cache-num: 114599Server-Name:10.0.0.5 Cache-num: 130974Server-Name:10.0.0.6 Cache-num: 77227Server-Name:10.0.0.7 Cache-num: 121106Server-Name:10.0.0.8 Cache-num: 84139Server-Name:10.0.0.9 Cache-num: 63553Server-Name:10.0.0.10 Cache-num: 103258标准差为: 21450.422699797782总缓存个数:1000000 服务器台数:10 每服务器虚拟节点数:100 平均值:100000Server-Name:10.0.0.1 Cache-num: 119498Server-Name:10.0.0.2 Cache-num: 91413Server-Name:10.0.0.3 Cache-num: 107542Server-Name:10.0.0.4 Cache-num: 90998Server-Name:10.0.0.5 Cache-num: 107033Server-Name:10.0.0.6 Cache-num: 96690Server-Name:10.0.0.7 Cache-num: 101308Server-Name:10.0.0.8 Cache-num: 89079Server-Name:10.0.0.9 Cache-num: 90149Server-Name:10.0.0.10 Cache-num: 106290标准差为: 10054.467962055476
参考:
https://github.com/youbl/study/tree/master/ConsistentHashProject
划线
评论
复制
发布于: 2020 年 07 月 08 日 阅读数: 36
a晖
关注
还未添加个人签名 2018.12.05 加入
还未添加个人简介
评论