写点什么

架构作业:一致性 hash

用户头像
Nick~毓
关注
发布于: 2020 年 10 月 22 日



  1. 用你熟悉的编程语言实现一致性 hash 算法。

  2. 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。






<?php

class ConsisterHash
{
protected $serverList = []; //服务器列表
protected $vistualNode = []; //虚拟节点
public $vistualNodeCount = 150; //虚拟节点数 默认150


//增加实际服务器节点
public function addNodes(array $nodes)
{
foreach ($nodes as $node) {
$this->addNode($node);
}
}
//增加实际服务器节点
private function addNode($server)
{
if (!isset($this->serverList[$server])) {

$this->addVistualNode($server); //增加虚拟节点

ksort($this->vistualNode, SORT_NUMERIC);

}

return true;
}

public function getVistualNode()
{
return $this->vistualNode;
}

public function lookup($key)
{
$point = $this->hash(($key));

$finalServer = current($this->vistualNode);

foreach ($this->vistualNode as $k => $server) {

if ($k >= $point) {
$finalServer = $server;
break;
}
}

reset($this->vistualNode);

return $finalServer;

}

//删除实际服务节点
public function delNode($node)
{
if (isset($this->serverList[$node])) {

//删除虚拟节点
foreach ($this->serverList as $k => $v) {
unset($this->vistualNode[$v]);
}

//删除实际节点
unset($this->serverList[$node]);

}

return true;
}


/**
* @param $string
* @return string
* 字符串转化成整形数字2^32;
*/
public function hash($string)
{
return sprintf("%u", crc32(md5($string)));
}

/**
* time33 函数
* @param string $str
* @return 32位正整数
* hash(i)=33*hash(i-1)+str[i]
*/
public function time33($str)
{
$hash = 0;
$s = md5($str); //相比其它版本,进行了md5加密
$seed = 5;
$len = 32;//加密后长度32
for ($i = 0; $i < $len; $i++) {

$hash = ($hash << $seed) + $hash + ord($s{$i});
}
return $hash & 0x7FFFFFFF;
}

/**
* @param $server
*/
private function addVistualNode($server)
{
for ($i = 0; $i < $this->vistualNodeCount; $i++) {

$vistualHashKey = $this->hash($server . "-" . $i);
$this->vistualNode[$vistualHashKey] = $server;
$this->serverList[$server][] = $vistualHashKey;//用作删除

}
}

/**
* @param $length
* @return string
* 随机数
*/
function randomKeys($length)
{
$pattern = '1234567890abcdefghijklmnopqrstuvwxyz
ABCDEFGHIJKLOMNOPQRSTUVWXYZ';

$key = "";
for ($i = 0; $i < $length; $i++) {
$key .= $pattern{mt_rand(0, 35)}; //生成php随机数
}
return $key;
}

/**
* @param $arr
* @return float
* 计算标准差
*/
public function standDeviation($arr)
{
$num_of_elements = count($arr);

$variance = 0.0;

$average = array_sum($arr) / $num_of_elements;

foreach ($arr as $i) {
$variance += pow(($i - $average), 2);
}

return (float)sqrt($variance / $num_of_elements);
}

}

$con = new ConsisterHash();

$nodeServerList = [
"127.0.0.0:8080",
"127.1.1.1:8080",
"127.2.2.2:8080",
"127.3.3.3:8080",
"127.4.4.4:8080",
"127.5.5.5:8080",
"127.6.6.6:8080",
"127.7.7.7:8080",
"127.8.8.8:8080",
"127.9.9.9:8080",
];

$con->addNodes($nodeServerList);

$nodeServerList_hit = [
"127.0.0.0:8080" => 0,
"127.1.1.1:8080" => 0,
"127.2.2.2:8080" => 0,
"127.3.3.3:8080" => 0,
"127.4.4.4:8080" => 0,
"127.5.5.5:8080" => 0,
"127.6.6.6:8080" => 0,
"127.7.7.7:8080" => 0,
"127.8.8.8:8080" => 0,
"127.9.9.9:8080" => 0,
];

for ($i = 0; $i < 1000000; $i++) {

$node = $con->lookup($con->randomKeys(6));

$nodeServerList_hit[$node]++;

}

print "总计数:" . $i . "\n";

print "虚拟节点设置为:" . $con->vistualNodeCount . "\n";

foreach ($nodeServerList_hit as $k => $v) {
print $k . ' 存储key数: ' . $v . "\n";
}

print "标准差: " . $con->standDeviation($nodeServerList_hit) . "\n";




跑出的数据:

总计数:1000000

虚拟节点设置为:150

127.0.0.0:8080 存储key数: 112196

127.1.1.1:8080 存储key数: 104381

127.2.2.2:8080 存储key数: 92076

127.3.3.3:8080 存储key数: 91984

127.4.4.4:8080 存储key数: 98362

127.5.5.5:8080 存储key数: 97711

127.6.6.6:8080 存储key数: 105610

127.7.7.7:8080 存储key数: 95047

127.8.8.8:8080 存储key数: 97507

127.9.9.9:8080 存储key数: 105126

标准差: 6256.1921965362



总计数:1000000

虚拟节点设置为:100

127.0.0.0:8080 存储key数: 101162

127.1.1.1:8080 存储key数: 93289

127.2.2.2:8080 存储key数: 90498

127.3.3.3:8080 存储key数: 101348

127.4.4.4:8080 存储key数: 103804

127.5.5.5:8080 存储key数: 97908

127.6.6.6:8080 存储key数: 100853

127.7.7.7:8080 存储key数: 93081

127.8.8.8:8080 存储key数: 112000

127.9.9.9:8080 存储key数: 106057

标准差: 6217.9351234956



总计数:1000000

虚拟节点设置为:200

127.0.0.0:8080 存储key数: 105619

127.1.1.1:8080 存储key数: 107050

127.2.2.2:8080 存储key数: 91256

127.3.3.3:8080 存储key数: 88815

127.4.4.4:8080 存储key数: 91780

127.5.5.5:8080 存储key数: 113026

127.6.6.6:8080 存储key数: 108216

127.7.7.7:8080 存储key数: 96278

127.8.8.8:8080 存储key数: 96140

127.9.9.9:8080 存储key数: 101820

标准差: 7871.7878401288



总体感觉:分布落在区间还是挺均衡的,差别不是很明显;

用户头像

Nick~毓

关注

还未添加个人签名 2018.05.09 加入

还未添加个人简介

评论

发布
暂无评论
架构作业:一致性hash