Week5- 作业

关注
发布于: 2020 年 07 月 08 日
﻿
用你熟悉的编程语言实现一致性 hash 算法。
编写测试用例测试这个算法，测试 100 万 KV 数据，10 个服务器节点的情况下，计算这些 KV 数据在服务器上分布数量的标准差，以评估算法的存储负载不均衡性。
﻿
package com.study.tmp;
﻿
import java.util.*;
﻿
﻿
﻿
class HashRel {
    //
    //服务器数量
    private int host_count;
    //虚拟节点数量
    private int virtual_count;
﻿
    SortedMap<Integer, String> circle_map;
﻿
    public HashRel(int host_count, int virtual_count) {
        circle_map = new TreeMap<Integer, String>();
        this.host_count = host_count;
        this.virtual_count = virtual_count;
    }
﻿
    //创建hash对应关系
    public void getRel() {
        //建立hashcode和主机的对应关系
        for (int i = 0; i < host_count; i++) {
            String host = "host" + "_" + String.valueOf(i);
            for (int j = 0; j < virtual_count; j++) {
                String host_virtual_name = host + "_" + String.valueOf(j);
                int hash = 0;
                System.out.println("host_virtual_name.hashcode:" + Math.abs(host_virtual_name.hashCode()));
﻿
                hash = Math.abs(host_virtual_name.hashCode());
﻿
                circle_map.put(hash, host);
            }
        }
    }
﻿
    //取真是节点
    public String getHost(String data) {
        int hash = data.hashCode();
        hash = Math.abs(hash);
﻿
        System.out.println("getHost hash:" + hash);
﻿
        if (circle_map.isEmpty()) {
            return null;
        }
﻿
        if (!circle_map.containsKey(hash)) {
            SortedMap<Integer, String> tailMap = circle_map.tailMap(hash);// 沿环的顺时针找到一个虚拟节点
            if (tailMap.isEmpty()) {
                hash = circle_map.firstKey();
                System.out.println("empty:" + hash);
            } else {
                hash = tailMap.firstKey();
                System.out.println("not empty:" + hash);
            }
        }
        System.out.println("目标主机：" + circle_map.get(hash));
        return circle_map.get(hash);
    }
﻿
    @Override
    public String toString() {
        String str = "";
﻿
        for (Map.Entry<Integer, String> entry : circle_map.entrySet()) {
            System.out.println("Key = " + entry.getKey() + ", Value = " + entry.getValue());
            str = str + "Key = " + entry.getKey() + ", Value = " + entry.getValue();
        }
        return str;
    }
}
﻿
﻿
public class HashTest {
    //求标准差
    public double getStandardDeviation(Map<String, Integer> map) {
        int sum = 0;
﻿
        for (Map.Entry<String, Integer> entry : map.entrySet()) {
            System.out.println("host = " + entry.getKey() + ", count = " + entry.getValue());
            sum += entry.getValue(); //求出数组的总和
        }
﻿
        System.out.println(sum);
        double average = sum / map.size();  //求出数组的平均数
        System.out.println(average);   //
        int total = 0;
﻿
        for (Map.Entry<String, Integer> entry2 : map.entrySet()) {
            total += (entry2.getValue() - average) * (entry2.getValue() - average);   //求出方差，如果要计算方差的话这一步就可以了
        }
﻿
        double standardDeviation = Math.sqrt(total / map.size());   //求出标准差
        System.out.println(standardDeviation);
﻿
        return standardDeviation;
    }
﻿
    public static void main(String[] args) {
        int host_count = 10;
        int virtual_count = 5000;
        int data_count = 1000000;
﻿
        HashTest hashTest = new HashTest();
        //每台机器真是记录数
        Map<String, Integer> map = new HashMap<String, Integer>();
﻿
        //建立服务器与哈希值的对应关系
        HashRel hashRel = new HashRel(host_count, virtual_count);
        hashRel.getRel();
        //hashRel.toString();
﻿
//        String data = UUID.randomUUID().toString();
//        hashRel.getHost(data);
﻿
        //初始化所有的机器上数据为0
        for (int i = 0; i < host_count; i++) {
            String host = "host" + "_" + String.valueOf(i);
            map.put(host, 0);
        }
﻿
        //遍历数据匹配对应主机
        for (int j = 0; j < data_count; j++) {
            String data = UUID.randomUUID().toString();
            String host = hashRel.getHost(data);
            // 每台真实机器节点上保存的记录条数加1
            int count_tmp = map.get(host) + 1;
            map.put(host, count_tmp);
        }
﻿
        //查看主机 map的数据分布情况
        for (Map.Entry<String, Integer> entry : map.entrySet()) {
            System.out.println("host = " + entry.getKey() + ", count = " + entry.getValue());
        }
﻿
        //计算标准差
        System.out.println("标准差：" + hashTest.getStandardDeviation(map));
    }
﻿
}
﻿
﻿
结果：
host = host_1, count = 13681
host = host_2, count = 13599
host = host_0, count = 728601
host = host_5, count = 13780
host = host_6, count = 13708
host = host_3, count = 11533
host = host_4, count = 13684
host = host_9, count = 164197
host = host_7, count = 13617
host = host_8, count = 13600
1000000
100000.0
14654.29507004687
标准差：14654.29507004687
pS：这个分布不好，需要再调整调整。。
﻿
发布于: 2020 年 07 月 08 日阅读数: 41
龙7

关注
还未添加个人签名 2019.02.12 加入
还未添加个人简介
发布
暂无评论
创作场景

Week5- 作业

龙7

评论