架构师训练营 - 第五周 - 作业

用户头像
Anrika
关注
发布于: 2020 年 07 月 08 日

用你熟悉的编程语言实现一致性 hash 算法。

编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。

示例代码

class Flexihash
{
/**
* @var int
* @comment 虚拟节点数,解决节点分布不均的问题
*/
private $_replicas = 64;

/**
* @var object Flexihash_Hasher
* @comment 使用的hash方法 : md5,crc32
*/
private $_hasher;

/**
* @var int
* @comment 节点记数器
*/
private $_targetCount = 0;

/**
* @var array { position => target, ... }
* @comment 位置对应节点,用于lookup中根据位置确定要访问的节点
*/
private $_positionToTarget = array();

/**
* @var array { target => [ position, position, ... ], ... }
* @comment 节点对应位置,用于删除节点
*/
private $_targetToPositions = array();

/**
* @var boolean
* @comment 是否已排序
*/
private $_positionToTargetSorted = false;

/**
* Constructor
* @param object $hasher Flexihash_Hasher
* @param int $replicas Amount of positions to hash each target to.
* @comment 构造函数,确定要使用的hash方法和需拟节点数,虚拟节点数越多,分布越均匀,但程序的分布式运算越慢
*/
public function __construct(Flexihash_Hasher $hasher = null, $replicas = null)
{
$this->_hasher = $hasher ? $hasher : new Flexihash_Crc32Hasher();
if (!empty($replicas)) $this->_replicas = $replicas;
}

/**
* 添加target.
* @param string $target
* @chainable
* @comment 添加节点,根据虚拟节点数,将节点分布到多个虚拟位置上
*/
public function addTarget($target)
{
if (isset($this->_targetToPositions[$target]))
{
throw new Flexihash_Exception("Target '$target' already exists.");
}

$this->_targetToPositions[$target] = array();

// hash the target into multiple positions
for ($i = 0; $i < $this->_replicas; $i++)
{
$position = $this->_hasher->hash($target . $i);
$this->_positionToTarget[$position] = $target; // lookup
$this->_targetToPositions[$target] []= $position; // target removal
}

$this->_positionToTargetSorted = false;
$this->_targetCount++;

return $this;
}

/**
* 添加
*/
public function addTargets($targets)
{
foreach ($targets as $target)
{
$this->addTarget($target);
}

return $this;
}

public function removeTarget($target)
{
if (!isset($this->_targetToPositions[$target]))
{
throw new Flexihash_Exception("Target '$target' does not exist.");
}

foreach ($this->_targetToPositions[$target] as $position)
{
unset($this->_positionToTarget[$position]);
}

unset($this->_targetToPositions[$target]);

$this->_targetCount--;

return $this;
}

public function getAllTargets()
{
return array_keys($this->_targetToPositions);
}

public function getAll()
{
return array(
"targers"=>$this->_positionToTarget,
"positions"=>$this->_targetToPositions);
}

public function lookup($resource)
{
$targets = $this->lookupList($resource, 1);
if (empty($targets)) throw new Flexihash_Exception('No targets exist');
return $targets[0]; //0表示返回离资源位置最近的机器节点
}

/**
* @param string $resource
* @param int $requestedCount The length of the list to return
* @return array List of targets
* @comment 查找当前的资源对应的节点,
* 节点为空则返回空,节点只有一个则返回该节点,
* 对当前资源进行hash,对所有的位置进行排序,在有序的位置列上寻找当前资源的位置
* 当全部没有找到的时候,将资源的位置确定为有序位置的第一个(形成一个环)
* 返回所找到的节点
*/
public function lookupList($resource, $requestedCount)
{
if (!$requestedCount)
throw new Flexihash_Exception('Invalid count requested');

if (empty($this->_positionToTarget))
return array();

if ($this->_targetCount == 1)
return array_unique(array_values($this->_positionToTarget));

$resourcePosition = $this->_hasher->hash($resource);

$results = array();
$collect = false;

$this->_sortPositionTargets();

foreach ($this->_positionToTarget as $key => $value)
{
if (!$collect && $key > $resourcePosition)
{
$collect = true;
}

if ($collect && !in_array($value, $results))
{
$results []= $value;
}
if (count($results) == $requestedCount || count($results) == $this->_targetCount)
{
return $results;
}
}

foreach ($this->_positionToTarget as $key => $value)
{
if (!in_array($value, $results))
{
$results []= $value;
}

if (count($results) == $requestedCount || count($results) == $this->_targetCount)
{
return $results;
}
}

return $results;
}

public function __toString()
{
return sprintf(
'%s{targets:[%s]}',
get_class($this),
implode(',', $this->getAllTargets())
);
}

private function _sortPositionTargets()
{
if (!$this->_positionToTargetSorted)
{
ksort($this->_positionToTarget, SORT_REGULAR);
$this->_positionToTargetSorted = true;
}
}

}

interface Flexihash_Hasher
{
public function hash($string);

}

class Flexihash_Crc32Hasher implements Flexihash_Hasher
{
public function hash($string)
{
return crc32($string);
}

}

class Flexihash_Md5Hasher implements Flexihash_Hasher
{
public function hash($string)
{
return substr(md5($string), 0, 8); // 8 hexits = 32bit
}

}

class Flexihash_Exception extends Exception
{
}



测试代码

$hash = new Flexihash();
$targets=array(
"192.168.1.1:11011",
"192.168.1.1:11012",
"192.168.1.1:11013",
"192.168.1.1:11014",
"192.168.1.1:11015",
);
$hash->addTargets($targets);
for ($i=0; $i < 25; $i++) {
$resource = sprintf("format %d",$i);
var_dump($resource." --> ".$hash->lookup($resource));
}



输出结果

string(30) "format 0 --> 192.168.1.1:11015"
string(30) "format 1 --> 192.168.1.1:11015"
string(30) "format 2 --> 192.168.1.1:11015"
string(30) "format 3 --> 192.168.1.1:11015"
string(30) "format 4 --> 192.168.1.1:11011"
string(30) "format 5 --> 192.168.1.1:11011"
string(30) "format 6 --> 192.168.1.1:11011"
string(30) "format 7 --> 192.168.1.1:11011"
string(30) "format 8 --> 192.168.1.1:11012"
string(30) "format 9 --> 192.168.1.1:11013"
string(31) "format 10 --> 192.168.1.1:11013"
string(31) "format 11 --> 192.168.1.1:11011"
string(31) "format 12 --> 192.168.1.1:11012"
string(31) "format 13 --> 192.168.1.1:11011"
string(31) "format 14 --> 192.168.1.1:11014"
string(31) "format 15 --> 192.168.1.1:11014"
string(31) "format 16 --> 192.168.1.1:11014"
string(31) "format 17 --> 192.168.1.1:11014"
string(31) "format 18 --> 192.168.1.1:11012"
string(31) "format 19 --> 192.168.1.1:11012"
string(31) "format 20 --> 192.168.1.1:11013"
string(31) "format 21 --> 192.168.1.1:11012"
string(31) "format 22 --> 192.168.1.1:11012"
string(31) "format 23 --> 192.168.1.1:11014"
string(31) "format 24 --> 192.168.1.1:11012"



用户头像

Anrika

关注

还未添加个人签名 2018.08.21 加入

还未添加个人简介

评论

发布
暂无评论
架构师训练营 - 第五周 - 作业