第五周作业

用户头像
刘卓
关注
发布于: 2020 年 07 月 08 日
  • 用你熟悉的编程语言实现一致性 hash 算法。

class HashCircle
{
public $size = 2147483647;
//4294967296;

/**
* @var array
*/
public $nodes;

public $containers;

private $indexs;

public function addContainer($name, $virtual_count)
{
$this->containers[$name] = new Container(['name' => $name]);
$virtual_nodes = $this->makeVirtualNodes($name, $virtual_count);
foreach ($virtual_nodes as $virtual_node) {
//$index = $this->hash($virtual_node->index);
$this->nodes[] = ['index' => $virtual_node['index'],
'name' => $virtual_node['name']];
$this->indexs[] = $virtual_node['index'];
}
array_multisort($this->indexs, $this->nodes);
}

private function makeVirtualNodes($name, $virtual_count)
{
$nodes = [];
for ($i = 0; $i < $virtual_count; $i++) {
$nodes[] = [
'name' => $name,
'index' => $this->hash('')];
}
return $nodes;
}
private function findContainer($index)
{
if (empty($this->containers) || empty($this->nodes))
return null;
$low = 0;
$high = count($this->nodes) - 1;

$times = 1;
while ($low <= $high) {
// debug
if ($times > count($this->nodes) / 2) {
echo $low . ':' . $this->nodes[$low]['index'] . '--' . $high . ':' . $this->nodes[$high]['index'] . '--' . $index . '-' . $times . "\r\n";
break;
}
$times++;

$mid = intval(($high + $low) / 2);
// 值等于当前点,则判定为放置于当前点
if ($index == $this->nodes[$mid]['index']) {
return $this->containers[$this->nodes[$mid]['name']];
} //最后一个点
elseif ($mid >= $high)
break;
// 值小于等于下一个点,大于当前点,则判定为放置于下一个点
elseif ($this->nodes[$mid]['index'] < $index &&
$index <= $this->nodes[$mid + 1]['index']) {
return $this->containers[$this->nodes[$mid + 1]['name']];
} // 值大于下一个点,后半部分移动
elseif ($index > $this->nodes[$mid + 1]['index']) {
$low = $mid + 1;
} // 值小于当前点,前半部分移动
elseif ($index < $this->nodes[$mid]['index']) {
$high = $mid;
} else {
// debug
echo $low . ':' . $this->nodes[$low]['index'] . '--' . $high . ':' . $this->nodes[$high]['index'] . '--' . ($mid + 1) . ':' . $this->nodes[$mid + 1]['index'] . '--' . $index . '-' . $times . "\r\n";
throw new \Exception('???' . $index);
}

}
//一圈没找到则判定放置于第1个点
return $this->containers[$this->nodes[0]['name']];
}

public function addObject($key, $value)
{
$index = $this->hash($key);
$this->findContainer($index)->nodes[] = [$index => $value];
}

function hash($s, $seed = 1)
{
// mt_srand($seed);
// fake hash
return mt_rand(1, $this->size);

$len = $this->size;
$float = sprintf('%u', crc32($s));
return intval(fmod($float, $len));
}
}
  • 编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。

尝试:代码通过不断缩小最小标准差对应虚拟点个数的范围来尝试找到让标准差最小的虚拟点个数。

问题:kv 10万的情况下似乎结果浮动较大,几次测试大概最佳位置在 7w到10w 之间

class HashController
{
public function actionRun($server_count = 10, $virtual_count = 61000, $kv_count = 1000000)
{
$poll = new HashCircle();
for ($i = 0; $i < $server_count; $i++) {
$poll->addContainer('server' . $i, $virtual_count);
}
for ($i = 0; $i < $kv_count; $i++) {
$poll->addObject($i, $i);
}
$container_nodes_conuts = [];
foreach ($poll->containers as $container) {
$count = count($container->nodes);
// echo $container->name . ': ' . $count . "\r\n";
$container_nodes_conuts[] = $count;
}
$variance = $this::getVariance($container_nodes_conuts, false);
echo 'server count: ' . $server_count . ' virtual count: ' . $virtual_count . ' kv count: ' . $kv_count . ' variance:' . $variance . "\r\n";
unset($poll->containers);
unset($poll->nodes);
unset($poll);
return $variance;
}
public $mean = 10;
public $max_times = 5;
public $blocks = 10;
public $kv_count = 100000;
public $server_count = 10;
public $low = 0;
public $high = 100000;
private $_total_times = 0;
private $_pre_min_index;
public function actionBulkRun()
{
echo '----from ' . $this->low . ' to ' . $this->high . "\r\n";
$this->_total_times++;
$virtual_count_step = intval(($this->high - $this->low) / $this->blocks);
$result = [];
for ($i = 0; $i < $this->blocks; $i++) {
$this->low = $this->low + $virtual_count_step;
$variance = $this->actionRun($this->server_count, $this->low, $this->kv_count);
$result[$this->low] = $variance;
}
$min_variance = min($result);
$min = array_search($min_variance, $result);
// 与前一次最小标准差位置重复
if ($this->_pre_min_index == $min) {
echo '1; min variance:' . $min_variance . '--virtual count:' . $this->low . "\r\n";
return;
}
$this->_pre_min_index = $min;
// 最高位最低位
if ($min <= 0 || $min >= $this->kv_count) {
echo '2; min variance:' . $min_variance . '--virtual count:' . $this->low . "\r\n";
return;
}
// 寻找了最大次数或达到期望
if ($min_variance < $this->mean) {
echo '3; min variance:' . $min_variance . '--virtual count:' . $this->low . "\r\n";
return;
}
if ($this->_total_times > $this->max_times) {
echo '4; min variance:' . $min_variance . '--virtual count:' . $this->low . "\r\n";
return;
}
$this->low = $min - $virtual_count_step;
$this->high = $min + $virtual_count_step;
$this->actionBulkRun();
}
public static function getVariance($list, $isSwatch = FALSE)
{
$count = count($list);
if ($count == 1 && $isSwatch == TRUE) {
return false;
} elseif ($count > 0) {
$avg = self::average($list);
$total_var = 0;
foreach ($list as $lv)
$total_var += pow(($lv - $avg), 2);
if ($count == 1 && $isSwatch == TRUE)
return false;
return $isSwatch ? sqrt($total_var / (count($list) - 1)) : sqrt($total_var / count($list));
} else
return false;
}
public static function average($data)
{
$sum = 0;
$count = count($data);
for ($i = 0; $i < $count; $i++) {
$sum += $data[$i];
}
$avg = $sum / $count;
return $avg;
}
}



执行 actionRun 测试指定参数的标准差结果,执行 actionBulkRun 尝试从规定参数范围内寻找最优虚拟点数量。

以下为执行 actionBulkRun,在给定的参数下寻找最优虚拟点数量。

参数:

kv 数量=十万,服务器数量=10,最大环上点的数量=2147483647

执行结果:

----from 0 to 100000

server count: 10 virtual count: 10000 kv count: 100000 variance:170.15581094985

server count: 10 virtual count: 20000 kv count: 100000 variance:150.7594109832

server count: 10 virtual count: 30000 kv count: 100000 variance:129.29578492743

server count: 10 virtual count: 40000 kv count: 100000 variance:116.81866289254

server count: 10 virtual count: 50000 kv count: 100000 variance:120.6399602122

server count: 10 virtual count: 60000 kv count: 100000 variance:96.542218743926

server count: 10 virtual count: 70000 kv count: 100000 variance:99.465571933207

server count: 10 virtual count: 80000 kv count: 100000 variance:133.26289806244

server count: 10 virtual count: 90000 kv count: 100000 variance:71.665891468676

server count: 10 virtual count: 100000 kv count: 100000 variance:116.40790351175

----from 80000 to 100000

server count: 10 virtual count: 82000 kv count: 100000 variance:88.187300673056

server count: 10 virtual count: 84000 kv count: 100000 variance:71.527617044048

server count: 10 virtual count: 86000 kv count: 100000 variance:81.019750678461

server count: 10 virtual count: 88000 kv count: 100000 variance:96.768796623705

server count: 10 virtual count: 90000 kv count: 100000 variance:119.17382262896

server count: 10 virtual count: 92000 kv count: 100000 variance:131.49980988579

server count: 10 virtual count: 94000 kv count: 100000 variance:85.06585684045

server count: 10 virtual count: 96000 kv count: 100000 variance:71.299368861162

server count: 10 virtual count: 98000 kv count: 100000 variance:85.998837201441

server count: 10 virtual count: 100000 kv count: 100000 variance:130.62388755507

----from 94000 to 98000

server count: 10 virtual count: 94400 kv count: 100000 variance:64.983074719499

server count: 10 virtual count: 94800 kv count: 100000 variance:54.170102455137

server count: 10 virtual count: 95200 kv count: 100000 variance:93.645074616875

server count: 10 virtual count: 95600 kv count: 100000 variance:54.722938517591

server count: 10 virtual count: 96000 kv count: 100000 variance:124.79022397608

server count: 10 virtual count: 96400 kv count: 100000 variance:120.1940098341

server count: 10 virtual count: 96800 kv count: 100000 variance:92.134684022902

server count: 10 virtual count: 97200 kv count: 100000 variance:56.833088953531

server count: 10 virtual count: 97600 kv count: 100000 variance:67.505555326951

server count: 10 virtual count: 98000 kv count: 100000 variance:97.427922075758

----from 94400 to 95200

server count: 10 virtual count: 94480 kv count: 100000 variance:90.255193756371

server count: 10 virtual count: 94560 kv count: 100000 variance:144.15339052551

server count: 10 virtual count: 94640 kv count: 100000 variance:104.98190320241

server count: 10 virtual count: 94720 kv count: 100000 variance:76.546717761116

server count: 10 virtual count: 94800 kv count: 100000 variance:89.641508242555

server count: 10 virtual count: 94880 kv count: 100000 variance:129.83065893694

server count: 10 virtual count: 94960 kv count: 100000 variance:94.534649732254

server count: 10 virtual count: 95040 kv count: 100000 variance:103.23371542282

server count: 10 virtual count: 95120 kv count: 100000 variance:78.534069040131

server count: 10 virtual count: 95200 kv count: 100000 variance:100.5067161935

----from 94640 to 94800

server count: 10 virtual count: 94656 kv count: 100000 variance:107.72372069326

server count: 10 virtual count: 94672 kv count: 100000 variance:55.865910893854

server count: 10 virtual count: 94688 kv count: 100000 variance:104.45190280699

server count: 10 virtual count: 94704 kv count: 100000 variance:92.666067144344

server count: 10 virtual count: 94720 kv count: 100000 variance:105.86689756482

server count: 10 virtual count: 94736 kv count: 100000 variance:76.253524508707

server count: 10 virtual count: 94752 kv count: 100000 variance:101.2116594074

server count: 10 virtual count: 94768 kv count: 100000 variance:153.87007506335

server count: 10 virtual count: 94784 kv count: 100000 variance:77.402842325072

server count: 10 virtual count: 94800 kv count: 100000 variance:102.7959143157

----from 94656 to 94688

server count: 10 virtual count: 94659 kv count: 100000 variance:136.20646093339

server count: 10 virtual count: 94662 kv count: 100000 variance:106.85504199615

server count: 10 virtual count: 94665 kv count: 100000 variance:76.384553412323

server count: 10 virtual count: 94668 kv count: 100000 variance:125.72191535289

server count: 10 virtual count: 94671 kv count: 100000 variance:74.178163902863

server count: 10 virtual count: 94674 kv count: 100000 variance:89.352112454043

server count: 10 virtual count: 94677 kv count: 100000 variance:91.466933916033

server count: 10 virtual count: 94680 kv count: 100000 variance:59.391918642186

server count: 10 virtual count: 94683 kv count: 100000 variance:93.226605644526

server count: 10 virtual count: 94686 kv count: 100000 variance:97.156574661728

4; min variance:59.391918642186--virtual count:94686



用户头像

刘卓

关注

还未添加个人签名 2018.04.26 加入

还未添加个人简介

评论

发布
暂无评论
第五周作业