架构师训练营第五周作业

 关注
发布于: 2020 年 07 月 07 日
一致性哈希类
public class KetamaNodeLocator
    {
        private SortedList<long, string> ketamaNodes = new SortedList<long, string>();
        
        public KetamaNodeLocator(List<string> nodes, int nodeCopies)
        {
            ketamaNodes = new SortedList<long, string>();
            
            //对所有节点，生成nCopies个虚拟结点
            foreach (string node in nodes)
            {
                //每四个虚拟结点为一组
                for (int i = 0; i < nodeCopies / 4; i++)
                {
                    //getKeyForNode方法为这组虚拟结点得到惟一名称 
                    byte[] digest = HashAlgorithm.computeMd5(node + i);
                    /** Md5是一个16字节长度的数组，将16字节的数组每四个字节一组，分别对应一个虚拟结点，这就是为什么上面把虚拟结点四个划分一组的原因*/
                    for (int h = 0; h < 4; h++)
                    {
                        long m = HashAlgorithm.hash(digest, h);
                        ketamaNodes[m] = node;
                    }
                }
            }
        }
        public string GetPrimary(string k)
        {
            byte[] digest = HashAlgorithm.computeMd5(k);
            string rv = GetNodeForKey(HashAlgorithm.hash(digest, 0));
            return rv;
        }
        string GetNodeForKey(long hash)
        {
            string rv;
            long key = hash;
            //如果找到这个节点，直接取节点，返回   
            if (!ketamaNodes.ContainsKey(key))
            {
                var tailMap = from coll in ketamaNodes
                              where coll.Key > hash
                              select new { coll.Key };
                if (tailMap == null || tailMap.Count() == 0)
                    key = ketamaNodes.FirstOrDefault().Key;
                else
                    key = tailMap.FirstOrDefault().Key;
            }
            rv = ketamaNodes[key];
            return rv;
        }
    }
    public class HashAlgorithm
    {
        public static long hash(byte[] digest, int nTime)
        {
            long rv = ((long)(digest[3 + nTime * 4] & 0xFF) << 24)
                    | ((long)(digest[2 + nTime * 4] & 0xFF) << 16)
                    | ((long)(digest[1 + nTime * 4] & 0xFF) << 8)
                    | ((long)digest[0 + nTime * 4] & 0xFF);
            return rv & 0xffffffffL; /* Truncate to 32-bits */
        }
        
        /**
         * Get the md5 of the given key.
         */
        public static byte[] computeMd5(string k)
        {
            MD5 md5 = new MD5CryptoServiceProvider();
            byte[] keyBytes = md5.ComputeHash(Encoding.UTF8.GetBytes(k));
            md5.Clear();
            //md5.update(keyBytes);
            //return md5.digest();
            return keyBytes;
        }
    }
﻿
调用，使用10节点，1百万键值，分别尝试计算每个节点配置不同数量的虚拟节点来比较哈希分布的均匀性。标准差越大表示各个节点的键值越不均衡，反之越均衡。
﻿
class Program
    {
        static void Main(string[] args)
        {
            var totalCount = 1000000;
            var keys = new List<string>(totalCount);
            for (int i = 0; i < totalCount; i++)
            {
                keys.Add(Guid.NewGuid().ToString());
            }
            Run(10, 4, keys);
            Run(10, 50, keys);
            Run(10, 160, keys);
            Run(10, 200, keys);
            Run(10, 400, keys);
            Run(10, 1000, keys);
            //var stdDev = CalculateStdDev(new List<long>() { 1, 2, 100, 1 });
            //Console.WriteLine($"标准差:" + stdDev);
            Console.ReadKey();
        }
        private static void Run(int nodeCount, int nodeCopies, List<string> keys)
        {
            var nodes = new Dictionary<string, long>(nodeCount);
            for (int i = 0; i <= nodeCount - 1; i++)
            {
                nodes.Add($"node{i}", 0);
            }
            Console.WriteLine($"共{nodes.Keys.Count}个节点, 每节点虚拟节点数: {nodeCopies}, 总KV数: {keys.Count}");
            var ketamaNodeLocator = new KetamaNodeLocator(nodes.Keys.ToList(), nodeCopies);
            for (int i = 0; i < keys.Count; i++)
            {
                var node = ketamaNodeLocator.GetPrimary(keys[i]);
                nodes[node]++;
            }
            foreach (var node in nodes)
            {
                Console.WriteLine($"节点{node.Key}: {node.Value}, 占比: {Math.Round(decimal.Parse((node.Value * 1.0 / keys.Count * 1.0).ToString("0.000")) * 100, 2)}%");
            }
            Console.WriteLine($"总计: {nodes.Sum(x => x.Value)}");
            var stdDev = CalculateStdDev(nodes.Values);
            Console.WriteLine($"标准差:" + stdDev);
            Console.WriteLine("=============================");
        }
        private static double CalculateStdDev(IEnumerable<long> values)
        {
            double ret = 0;
            if (values.Count() > 0)
            {
                //  计算平bai均数   
                double avg = values.Average();
                //  计算各数值与平均数的差值的平方，然后求和 
                double sum = values.Sum(d => Math.Pow(d - avg, 2));
                //  除以数量，然后开方
                ret = Math.Sqrt(sum / values.Count());
            }
            return ret;
        }
    }
﻿
输出如下，得出结论：使用很少或者不使用虚拟节点会导致严重的分布不均匀。感觉标准差有点跳跃？不是很明白。。
﻿
发布于: 2020 年 07 月 07 日阅读数: 28
一剑

 关注
还未添加个人签名 2017.11.23 加入
还未添加个人简介
发布
暂无评论