关于 Elasticsearch 不同分片设置的压测报告
- 2023-07-17
本文字数:9201 字
阅读完需:约 30 分钟
摘要
为了验证当前集群经常出现索引超时以及请求拒绝的问题,现模拟线上集群环境及索引设置,通过压测工具随机生成测试数据,针对当前的 850 个分片的索引,以及减半之后的索引,以及更小分片索引的写入进行压测,使用不同的并发、不同的批次大小来观察索引的吞吐情况,并记录写入队列的堆积情况,用来分析分片数、批次数对写入的影响,从而确定后续的优化方案。
压测场景
Elasticsearch 版本 v7.7.1, 共有 57 个节点,其中 3 个独立 Master,3 个协调节点,31GB JVM。
压测流程
单索引 850 分片
展开查看索引定义
PUT idx-xxxx-xxxxxx{ "aliases" : { "alias-xxxx-xxxxxx" : { } }, "mappings" : { "dynamic" : "strict", "_routing" : { "required" : true }, "_source" : { "excludes" : [ "isExtract*", "batchNo" ] }, "properties" : { "addxxxx" : { "type" : "text", "term_vector" : "with_positions_offsets" }, "clxxxx" : { "type" : "byte" }, "contxxxx" : { "type" : "text", "boost" : 4.0, "term_vector" : "with_positions_offsets" }, "conxxxx" : { "type" : "keyword", "doc_values" : false }, "con1xxxx" : { "type" : "text", "boost" : 16.0, "term_vector" : "with_positions_offsets", "fields" : { "keyword" : { "type" : "keyword", "normalizer" : "keyword_normalizer" } }, "analyzer" : "name_analyzer", "search_analyzer" : "keyword_analyzer" }, "contSxxxx" : { "type" : "long", "index" : false, "doc_values" : false }, "contSxxxxx" : { "type" : "keyword", "doc_values" : false }, "contTxxxx" : { "type" : "short" }, "crtxxxx" : { "type" : "date", "ignore_malformed" : true, "format" : "yyyyMMddHHmmss" }, "duration" : { "type" : "long", "index" : false, "doc_values" : false }, "largeTxxxx" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "md5" : { "type" : "keyword", "index" : false, "doc_values" : false }, "orderxxxx" : { "type" : "alias", "path" : "contName.keyword" }, "ownxxxxxx" : { "type" : "keyword", "doc_values" : false }, "ownxxxxxxxxxx" : { "type" : "keyword", "doc_values" : false }, "ownxxxxxxxxxxx" : { "type" : "keyword", "doc_values" : false }, "ownxxxxxxxxxxx" : { "type" : "keyword", "doc_values" : false }, "parenxxxxxxxxxx" : { "type" : "keyword" }, "pathxx" : { "type" : "text", "boost" : 8.0, "term_vector" : "with_positions_offsets", "fields" : { "keyword" : { "type" : "keyword" } }, "analyzer" : "path_analyzer" }, "presexxxxx" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "presexxxxx" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "presxxxxxx" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "prixxxxxx" : { "type" : "short", "index" : false }, "search_xxxxxx" : { "type" : "alias", "path" : "contName" }, "servixxxxxx" : { "type" : "byte" }, "shotxxxxxx" : { "type" : "date", "ignore_malformed" : true, "format" : "yyyyMMddHHmmss" }, "xxxxxxlThuxxxxxx" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "tagxxxxxx" : { "type" : "text", "term_vector" : "with_positions_offsets" }, "thumxxxxxx" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "xxxxxxpdxxxxxx" : { "type" : "date", "ignore_malformed" : true, "format" : "yyyyMMddHHmmss" }, "xxxxxxderAcxxxxxx" : { "type" : "keyword", "doc_values" : false }, "xxxxxxerAccouxxxxxx" : { "type" : "keyword", "doc_values" : false }, "xxxxxxerxxxxxxID" : { "type" : "keyword", "doc_values" : false }, "xxxxxxderNxxxxxx" : { "type" : "keyword", "doc_values" : false } } }, "settings" : { "index" : { "max_ngram_diff" : "50", "refresh_interval" : "1s", "number_of_shards" : "850",
"analysis" : { "normalizer" : { "keyword_normalizer" : { "filter" : [ "lowercase" ], "type" : "custom" } }, "analyzer" : { "keyword_analyzer" : { "filter" : [ "lowercase" ], "type" : "custom", "tokenizer" : "keyword" }, "name_analyzer" : { "filter" : [ "lowercase" ], "type" : "custom", "tokenizer" : "name_tokenizer" }, "path_analyzer" : { "filter" : [ "lowercase" ], "type" : "custom", "tokenizer" : "path_tokenizer" } }, "tokenizer" : { "name_tokenizer" : { "type" : "ngram", "min_gram" : "1", "max_gram" : "5" }, "path_tokenizer" : { "pattern" : "/", "type" : "pattern" } } }, "number_of_replicas" : "1"
} } }
展开查看样例数据
POST idx-owncloud-img/_doc/1?routing=1{ "ownerxxxxxx" : "002#######0oV", "serxxxxxx" : 1, "tagxxxxxx" : "", "contxxxxxx" : "", "xxxxxxAccoxxxxxxe" : "1", "presxxxxxx" : "", "conxxxxxx" : "jpg", "xxxxxxerBxxxxxx" : "6#######573", "ownerxxxxxxx" : "13#######62", "presxxxxxxL" : "", "duxxxxxx" : 0, "paxxxxxx" : "00##########################################043", "crtxxxxxx" : "20#######45", "pxxxxxxtCatxxxxxx" : "001############################043", "sxxxxxxThumxxxxxx" : "http://downl#################################################961", "uxxxxxxerAxxxxxxt" : "1##############2", "uxxxxxxderAccoxxxxxxe" : "1", "uxxxxxxderxxxxxxID" : "0#####################V", "lxxxxxxhumxxxxxxl" : "http://d###################################D961", "thxxxxxxl" : "http://do###############################################################61", "axxxxxxss" : "", "uxxxxxxm" : "20##############8", "cxxxxxx" : 3, "coxxxxxx" : 1, "prxxxxxx" : 10, "coxxxxxx" : "0###################################cm", "co2xxxxxx" : 5##############8, "shoxxxxxx" : "20##############4", "contxxxxxx" : "mm##############g", "presxxxxxx" : "", "oxxxxxxBmpxxxxxx" : "6#######3", "md5" : "7##############1E"}
展开查看 loadgen 配置
root@loadgen:/opt/loadgen# cat loadgen.ymlstatsd: enabled: false host: 192.168.3.98 port: 8125 namespace: loadgen.variables: - name: ip type: file path: dict/ip.txt - name: message type: file path: dict/nginx.log# - name: user# type: file# path: dict/user.txt - name: id type: sequence - name: uuid type: uuid - name: now_local type: now_local - name: now_utc type: now_utc - name: now_unix type: now_unix - name: suffix type: range from: 12 to: 12 - name: bool type: range from: 0 to: 1requests: - request: method: POST runtime_variables: batch_no: id runtime_body_line_variables: routing_no: uuid basic_auth: username: elastic password: #### url: https://xxx.elasticsearch.xxx.cn:9243/_bulk body_repeat_times: 50 body: "{ \"create\" : { \"_index\" : \"idx-xxxxxx-xxxxxx\",\"_type\":\"_doc\", \"_id\" : \"$[[uuid]]\" , \"routing\" : \"$[[routing_no]]\" } }\n{ \"ownerxxxxxx\" : \"0011WsjCK0oV\", \"servxxxxxx\" : $[[bool]], \"tagxxxxxx\" : \"\", \"contxxxxxx\" : \"\", \"ownexxxxxxunxxxxxx\" : \"$[[bool]]\", \"prxxxxxxentLxxxxxx\" : \"\", \"conxxxxxx\" : \"jpg\", \"uxxxxxxexxxxxxID\" : \"$[[id]]\", \"owxxxxxxccxxxxxxt\" : \"$[[routing_no]]\", \"prxxxxxxtUxxxxxxL\" : \"\", \"durxxxxxxn\" : 0, \"paxxxxxx\" : \"00019700101000000001/0011WsjCK0oV00019700101000000043\", \"crxxxxxx\" : \"$[[id]]\", \"paxxxxxxntxxxxxxogIxxxxxx\" : \"0011WsjCK0oV00019700101000000043\", \"sxxxxxxThumxxxxxx\" : \"http://xxx.xxx.cn:80/storageWeb/servlet/GetFileByURLServlet?root=/mnt/wfs133&fileid=KB1af35f100578d655b2cfbd7edd2cb50e.jpg&ct=1&type=0&code=80B0EAB7F429F1A32F76EB895F5FF4DE1853D254604FAB67A7C33FDF92BE7220&exp=315&account=MTM2MzgzMTU1NjI=&p=0&ui=0011WsjCK0oV&ci=0011WsjCK0oV06320210812125345tcm&userSiteId=usersite-s&cn=mmexport162592513503...&oprChannel=10000000&dom=D961\", \"xxxxxxderAxxxxxxnt\" : \"$[[routing_no]]\", \"upxxxxxxerAcxxxxxxtype\" : \"$[[bool]]\", \"uploaderNDUserID\" : \"$[[uuid]]\", \"largeThumbnail\" : \"http://xxx.xxx.cn:80/storageWeb/servlet/GetFileByURLServlet?root=/mnt/wfs133&fileid=KB1af35f100578d655b2cfbd7edd2cb50e.jpg&ct=1&type=1&code=80B0EAB7F429F1A32F76EB895F5FF4DE1853D254604FAB67A7C33FDF92BE7220&exp=315&account=MTM2MzgzMTU1NjI=&p=0&ui=0011WsjCK0oV&ci=0011WsjCK0oV06320210812125345tcm&userSiteId=usersite-s&cn=mmexport162592513503...&oprChannel=10000000&dom=D961\", \"xxxxxxil\" : \"http://download.xxx.xxx.com:80/storageWeb/servlet/GetFileByURLServlet?root=/mnt/wfs133&fileid=KB1af35f100578d655b2cfbd7edd2cb50e.jpg&ct=1&type=2&code=80B0EAB7F429F1A32F76EB895F5FF4DE1853D254604FAB67A7C33FDF92BE7220&exp=315&account=MTM2MzgzMTU1NjI=&p=0&ui=0011WsjCK0oV&ci=0011WsjCK0oV06320210812125345tcm&userSiteId=usersite-s&cn=mmexport162592513503...&oprChannel=10000000&dom=D961\", \"adxxxxxx\" : \"\", \"upxxxxxx\" : \"$[[now_unix]]\", \"cxxxxxx\" : 3, \"contxxxxxxe\" : $[[bool]], \"prixxxxxx\" : 10, \"conxxxxxx\" : \"0011WsjCK0oV06320210812125345tcm\", \"contxxxxxx\" : $[[id]], \"shoxxxxxx\" : \"$[[id]]\", \"contxxxxxxe\" : \"mmexport1625925135032.jpg\", \"prxxxxxxtHxxxxxx\" : \"\", \"oxxxxxxrBmxxxxxxID\" : \"$[[id]]\", \"md5\" : \"$[[uuid]]\" }\n"
运行测试
开启 gzip 流量压缩,执行压测:
root@loadgen:/opt/loadgen# ./loadgen-linux-amd64 -config loadgen.yml -d 6000 -c 100 -compress
1 副本 100 并发
0 副本 100 并发
0 副本 200 并发
写入队列已经存在大量堆积和拒绝的现象了:
1 副本 200 并发
1 副本 400 并发
1 副本 800 并发
1 副本批次 500 并发 100
1 副本批次 2000 并发 100
1 副本批次 5000 并发 100
1 副本批次 5000 并发 200
单索引 425 分片
展开查看索引定义
PUT idx-xxxxxx-xxxxxx-425{ "aliases" : { "alias-xxxxxx-xxxxxx" : { } }, "mappings" : { "dynamic" : "strict", "_routing" : { "required" : true }, "_source" : { "excludes" : [ "isExtract*", "batchNo" ] }, "properties" : { "addxxxxxx" : { "type" : "text", "term_vector" : "with_positions_offsets" }, "cxxxxxx" : { "type" : "byte" }, "coxxxxxxc" : { "type" : "text", "boost" : 4.0, "term_vector" : "with_positions_offsets" }, "coxxxxxx" : { "type" : "keyword", "doc_values" : false }, "conxxxxxxe" : { "type" : "text", "boost" : 16.0, "term_vector" : "with_positions_offsets", "fields" : { "keyword" : { "type" : "keyword", "normalizer" : "keyword_normalizer" } }, "analyzer" : "name_analyzer", "search_analyzer" : "keyword_analyzer" }, "coxxxxxxze" : { "type" : "long", "index" : false, "doc_values" : false }, "conxxxxxxfix" : { "type" : "keyword", "doc_values" : false }, "coxxxxxxpe" : { "type" : "short" }, "cxxxxxxm" : { "type" : "date", "ignore_malformed" : true, "format" : "yyyyMMddHHmmss" }, "duxxxxxxon" : { "type" : "long", "index" : false, "doc_values" : false }, "laxxxxxxbnail" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "md5" : { "type" : "keyword", "index" : false, "doc_values" : false }, "ordxxxxxxNamxxxxxx" : { "type" : "alias", "path" : "contName.keyword" }, "oxxxxxxccoxxxxxxt" : { "type" : "keyword", "doc_values" : false }, "owxxxxxxcounxxxxxxpe" : { "type" : "keyword", "doc_values" : false }, "owxxxxxxpUsxxxxxxD" : { "type" : "keyword", "doc_values" : false }, "oxxxxxxDUsexxxxxxD" : { "type" : "keyword", "doc_values" : false }, "pxxxxxxtalxxxxxxD" : { "type" : "keyword" }, "patxxxxxx" : { "type" : "text", "boost" : 8.0, "term_vector" : "with_positions_offsets", "fields" : { "keyword" : { "type" : "keyword" } }, "analyzer" : "path_analyzer" }, "prxxxxxxntHxxxxxx" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "prxxxxxxntLxxxxxx" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "prxxxxxxURxxxxxx" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "pxxxxxxity" : { "type" : "short", "index" : false }, "sxxxxxxch_nxxxxxxe" : { "type" : "alias", "path" : "contName" }, "sexxxxxxeTxxxxxxe" : { "type" : "byte" }, "sxxxxxxTm" : { "type" : "date", "ignore_malformed" : true, "format" : "yyyyMMddHHmmss" }, "smxxxxxxThuxxxxxxl" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "taxxxxxxa" : { "type" : "text", "term_vector" : "with_positions_offsets" }, "txxxxxxnaxxxxxx" : { "type" : "keyword", "boost" : 8.0, "index" : false, "doc_values" : false }, "uxxxxxxm" : { "type" : "date", "ignore_malformed" : true, "format" : "yyyyMMddHHmmss" }, "upxxxxxxdexxxxxxount" : { "type" : "keyword", "doc_values" : false }, "upxxxxxxrAcxxxxxxpe" : { "type" : "keyword", "doc_values" : false }, "upxxxxxxmpUsxxxxxx" : { "type" : "keyword", "doc_values" : false }, "uxxxxxxerNDxxxxxxD" : { "type" : "keyword", "doc_values" : false } } }, "settings" : { "index" : { "max_ngram_diff" : "50", "refresh_interval" : "1s", "number_of_shards" : "425",
"analysis" : { "normalizer" : { "keyword_normalizer" : { "filter" : [ "lowercase" ], "type" : "custom" } }, "analyzer" : { "keyword_analyzer" : { "filter" : [ "lowercase" ], "type" : "custom", "tokenizer" : "keyword" }, "name_analyzer" : { "filter" : [ "lowercase" ], "type" : "custom", "tokenizer" : "name_tokenizer" }, "path_analyzer" : { "filter" : [ "lowercase" ], "type" : "custom", "tokenizer" : "path_tokenizer" } }, "tokenizer" : { "name_tokenizer" : { "type" : "ngram", "min_gram" : "1", "max_gram" : "5" }, "path_tokenizer" : { "pattern" : "/", "type" : "pattern" } } }, "number_of_replicas" : "1"
} } }
1 副本批次 50 并发 100
1 副本批次 50 并发 200
1 副本批次 50 并发 400
1 副本批次 50 并发 800
1 副本批次 500 并发 100
1 副本批次 2000 并发 100
1 副本批次 5000 并发 100
单索引 50 分片
1 副本批次 50 并发 100
1 副本批次 500 并发 100
1 副本批次 1000 并发 100
1 副本批次 5000 并发 100
走网关单索引 425 分片
1 副本批次 50 并发 400>200
1 副本批次 500 并发 100
1 副本批次 500 并发 200
1 副本批次 500 并发 400
1 副本批次 5000 并发 100
1 副本批次 5000 并发 200
1 副本批次 5000 并发 400
走网关单索引 850 分片
1 副本批次 50 并发 400
1 副本批次 500 并发 400
1 副本批次 5000 并发 400
压测结果
走网关节点异步合并模式:
结论
大分片索引,850 或者 425,在并发即使只有 100 的情况下就有可能出现占满线程池,出现请求拒绝的情况,单个批次的文档数比较小的情况下,更容易出现。而同样格式的索引,在 50 个分片的情况下,索引的吞吐是 425 分片的两倍,850 分片的三倍,且线程池基本上没有堆积,或者堆积很快处理完。单次请求的文档数越多,写入的效率越高。某些场景下索引分片虽然做了 Routing 处理,但是超大分片索引存在严重的转发效率问题,建议按照业务维度,或者当前的 Routing 维度进行索引的划分,将超大索引拆分成若干个子索引,单个索引的分片数尽量不要超过 20 个。
版权声明: 本文为 InfoQ 作者【极限实验室】的原创文章。
原文链接:【http://xie.infoq.cn/article/4df7287b1e0509feb2291aaed】。
本文遵守【CC-BY 4.0】协议,转载请保留原文出处及本版权声明。
极限实验室
简单、易用、极致、创新 2021-11-22 加入
极限实验室(INFINI Labs)致力于打造极致易用的数据探索与分析体验。










评论