es 6.2.4版本
logstash跑了一陣子之后不再同步數(shù)據(jù)了,日志信息如下:
[2019-06-19T10:30:28,379][INFO ][logstash.outputs.elasticsearch] retrying failed action with response code: 403 ({"type"=>"cluster_block_exception", "reason"=>"blocked by: [FORBIDDEN/12/index read-only / allow delete (api)];"})
[2019-06-19T10:30:28,379][INFO ][logstash.outputs.elasticsearch] Retrying individual bulk actions that failed or were rejected by the previous bulk request. {:count=>125}
檢查elasticsearch日志如下:(日志沒貼全,應(yīng)該有超過flood_stage閾值的告警,因為logstash日志里已經(jīng)提示索引只讀了...可是日志被我刪掉了...)
[2019-06-19T10:30:26,746][WARN ][o.e.c.r.a.DiskThresholdMonitor] [node-1] high disk watermark [90%] exceeded on [sPv40vq_RKanFIUuBgJuUQ][node-2][/usr/elasticsearch/data/nodes/0] free: 657.4mb[8%], shards will be relocated away from this node
[2019-06-19T10:30:26,746][INFO ][o.e.c.r.a.DiskThresholdMonitor] [node-1] low disk watermark [85%] exceeded on [DJl3qtK5Twmpi-MGNuujog][node-3][/usr/elasticsearch/data/nodes/0] free: 1gb[13.4%], replicas will not be assigned to this node
[2019-06-19T10:30:56,752][WARN ][o.e.c.r.a.DiskThresholdMonitor] [node-1] high disk watermark [90%] exceeded on [sPv40vq_RKanFIUuBgJuUQ][node-2][/usr/elasticsearch/data/nodes/0] free: 657.4mb[8%], shards will be relocated away from this node
[2019-06-19T10:30:56,752][INFO ][o.e.c.r.a.DiskThresholdMonitor] [node-1] low disk watermark [85%] exceeded on [DJl3qtK5Twmpi-MGNuujog][node-3][/usr/elasticsearch/data/nodes/0] free: 1gb[13.4%], replicas will not be assigned to this node
[2019-06-19T10:30:56,752][INFO ][o.e.c.r.a.DiskThresholdMonitor] [node-1] rerouting shards: [high disk watermark exceeded on one or more nodes]
查看代碼org.elasticsearch.cluster.routing.allocation.DiskThresholdMonitor#onNewInfo
,結(jié)合官網(wǎng)文檔 Disk-based Shard Allocationedit,可以知道es會對磁盤空間進行監(jiān)控,當磁盤空間使用量達到一定的閾值就會做不同的處理直撤。
這里其實是磁盤剩余空間達到了floodstage閾值项秉,導致Elasticsearch對每個索引強制執(zhí)行只讀索引塊愧捕,所以logstash在做數(shù)據(jù)同步的時候就報錯了商叹。
部分源碼如下:
public void onNewInfo(ClusterInfo info) {
ImmutableOpenMap<String, DiskUsage> usages = info.getNodeLeastAvailableDiskUsages();
if (usages != null) {
boolean reroute = false;
String explanation = "";
// Garbage collect nodes that have been removed from the cluster
// from the map that tracks watermark crossing
ObjectLookupContainer<String> nodes = usages.keys();
for (String node : nodeHasPassedWatermark) {
if (nodes.contains(node) == false) {
nodeHasPassedWatermark.remove(node);
}
}
ClusterState state = clusterStateSupplier.get();
Set<String> indicesToMarkReadOnly = new HashSet<>();
for (ObjectObjectCursor<String, DiskUsage> entry : usages) {
String node = entry.key;
DiskUsage usage = entry.value;
//檢測磁盤空間使用量,當達到不同閾值時給出告警或者info信息
warnAboutDiskIfNeeded(usage);
//磁盤使用量達到floodstage閾值,將所有索引都標記為只讀
if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdFloodStage().getBytes() ||
usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdFloodStage()) {
RoutingNode routingNode = state.getRoutingNodes().node(node);
if (routingNode != null) { // this might happen if we haven't got the full cluster-state yet?!
for (ShardRouting routing : routingNode) {
indicesToMarkReadOnly.add(routing.index().getName());
}
}
}
//磁盤使用量達到高閾值,超過重新分配分片的間隔時間則重新分配
else if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes() ||
usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) {
if ((System.nanoTime() - lastRunNS) > diskThresholdSettings.getRerouteInterval().nanos()) {
lastRunNS = System.nanoTime();
reroute = true;
explanation = "high disk watermark exceeded on one or more nodes";
} else {
logger.debug("high disk watermark exceeded on {} but an automatic reroute has occurred " +
"in the last [{}], skipping reroute",
node, diskThresholdSettings.getRerouteInterval());
}
nodeHasPassedWatermark.add(node);
}
//磁盤使用量達到低閾值
else if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdLow().getBytes() ||
usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdLow()) {
nodeHasPassedWatermark.add(node);
}
//磁盤使用量未達到任何閾值穆趴,如果之前達到了高閾值或低閾值,重新路由窟却,以便能夠分配任何未分配的分片
else {
if (nodeHasPassedWatermark.contains(node)) {
// The node has previously been over the high or
// low watermark, but is no longer, so we should
// reroute so any unassigned shards can be allocated
// if they are able to be
if ((System.nanoTime() - lastRunNS) > diskThresholdSettings.getRerouteInterval().nanos()) {
lastRunNS = System.nanoTime();
reroute = true;
explanation = "one or more nodes has gone under the high or low watermark";
nodeHasPassedWatermark.remove(node);
} else {
logger.debug("{} has gone below a disk threshold, but an automatic reroute has occurred " +
"in the last [{}], skipping reroute",
node, diskThresholdSettings.getRerouteInterval());
}
}
}
}
if (reroute) {
logger.info("rerouting shards: [{}]", explanation);
reroute();
}
indicesToMarkReadOnly.removeIf(index -> state.getBlocks().indexBlocked(ClusterBlockLevel.WRITE, index));
if (indicesToMarkReadOnly.isEmpty() == false) {
markIndicesReadOnly(indicesToMarkReadOnly);
}
}
}
/**
* Warn about the given disk usage if the low or high watermark has been passed
*/
private void warnAboutDiskIfNeeded(DiskUsage usage) {
//檢查磁盤剩余使用量
// Check absolute disk values
// 剩余磁盤使用量 < cluster.routing.allocation.disk.watermark.flood_stage
if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdFloodStage().getBytes()) {
logger.warn("flood stage disk watermark [{}] exceeded on {}, all indices on this node will marked read-only",
diskThresholdSettings.getFreeBytesThresholdFloodStage(), usage);
}
// 剩余磁盤使用量 < cluster.routing.allocation.disk.watermark.high
else if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
logger.warn("high disk watermark [{}] exceeded on {}, shards will be relocated away from this node",
diskThresholdSettings.getFreeBytesThresholdHigh(), usage);
}
// 剩余磁盤使用量 < cluster.routing.allocation.disk.watermark.low
else if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdLow().getBytes()) {
logger.info("low disk watermark [{}] exceeded on {}, replicas will not be assigned to this node",
diskThresholdSettings.getFreeBytesThresholdLow(), usage);
}
//檢查磁盤使用百分比
// Check percentage disk values
// 剩余磁盤百分比 < 100 - 95(cluster.routing.allocation.disk.watermark.flood_stage)
if (usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdFloodStage()) {
logger.warn("flood stage disk watermark [{}] exceeded on {}, all indices on this node will marked read-only",
Strings.format1Decimals(100.0 - diskThresholdSettings.getFreeDiskThresholdFloodStage(), "%"), usage);
}
// 剩余磁盤百分比 < 100 - 90(cluster.routing.allocation.disk.watermark.high)
else if (usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) {
logger.warn("high disk watermark [{}] exceeded on {}, shards will be relocated away from this node",
Strings.format1Decimals(100.0 - diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), usage);
}
// 剩余磁盤百分比 < 100 - 85(cluster.routing.allocation.disk.watermark.low)
else if (usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdLow()) {
logger.info("low disk watermark [{}] exceeded on {}, replicas will not be assigned to this node",
Strings.format1Decimals(100.0 - diskThresholdSettings.getFreeDiskThresholdLow(), "%"), usage);
}
}
這里涉及幾個配置:
-
cluster.routing.allocation.disk.threshold_enabled
是否開啟基于磁盤的分片分配,默認true
-
cluster.routing.allocation.disk.watermark.low
控制磁盤空間使用的低水位線,默認85%
昼丑,es不會再將分片分配給磁盤使用超過這個配置的節(jié)點。
這個設(shè)置不會影響新創(chuàng)建的索引的主分片夸赫,或者是之前從未分配過的任何分片
-
cluster.routing.allocation.disk.watermark.high
控制磁盤空間使用的高水位線,默認90%
菩帝,es會將磁盤使用超過這個配置的節(jié)點中的分片重新進行分配。
這個設(shè)置將影響所有分片的分配茬腿,不管分片之前是否已經(jīng)被分配過
-
cluster.routing.allocation.disk.watermark.flood_stage
控制磁盤空間使用的洪水水位線,默認95%
呼奢,es會將磁盤使用超過這個配置的節(jié)點中的所有索引都標記為只讀。
這是防止節(jié)點耗盡磁盤空間的最后手段切平。一旦有足夠的磁盤空間允許繼續(xù)索引操作握础,需要手動釋放索引塊
-
cluster.routing.allocation.disk.include_relocations
當計算一個節(jié)點的剩余磁盤空間時,是否考慮正在重新分配到當前節(jié)點的分片容量悴品,默認true
禀综。
這可能導致錯誤的高估一個磁盤的使用率。因為分片重分配可能已經(jīng)完成了90%苔严,檢索到的磁盤使用率包含了這個重新分配的分片總大小以及這已經(jīng)分配了的90%進度的大小定枷。
-
cluster.routing.allocation.disk.reroute_interval
分片重分配間隔,默認60秒
-
cluster.info.update.interval
磁盤使用率檢查間隔届氢,默認30秒
關(guān)于配置的幾點說明:
- 上面幾個配置要么都設(shè)置為百分比依鸥,要么都設(shè)置為具體的字節(jié)值,不能混用悼沈。
- 可以通過在配置文件
elasticsearch.yml
中配置贱迟,也可以在 cluster-update-settings API 在實時群集上動態(tài)更新。直接參考官網(wǎng)文檔即可絮供。
測試
-- 添加文檔衣吠,自動創(chuàng)建索引
curl http://172.16.22.51:9200/idx_luoluocaihong/_doc/1 -X PUT -H 'Content-Type:application/json' -d '{"user":"luoluocaihong","age":"20"}'
{"_index":"idx_luoluocaihong","_type":"_doc","_id":"1","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1}
-- 設(shè)置只讀索引塊
curl http://172.16.22.51:9200/idx_luoluocaihong/_settings -X PUT -H 'Content-Type:application/json' -d '{"index.blocks.read_only_allow_delete": true}'
{"acknowledged":true}
-- 查看索引的設(shè)置
curl http://172.16.22.51:9200/idx_luoluocaihong/_settings
{"idx_luoluocaihong":{"settings":{"index":{"number_of_shards":"5","blocks":{"read_only_allow_delete":"true"},"provided_name":"idx_luoluocaihong","creation_date":"1561107195032","number_of_replicas":"1","uuid":"3iS68s1nQMudxhyL-zNnRg","version":{"created":"6020499"}}}}}
-- 添加文檔
curl http://172.16.22.51:9200/idx_luoluocaihong/_doc/2 -X PUT -H 'Content-Type:application/json' -d '{"user":"user_2","age":"20"}'
{"error":{"root_cause":[{"type":"cluster_block_exception","reason":"blocked by: [FORBIDDEN/12/index read-only / allow delete (api)];"}],"type":"cluster_block_exception","reason":"blocked by: [FORBIDDEN/12/index read-only / allow delete (api)];"},"status":403}
-- 重置只讀索引塊
curl http://172.16.22.51:9200/idx_luoluocaihong/_settings -X PUT -H 'Content-Type:application/json' -d '{"index.blocks.read_only_allow_delete": null}'
{"acknowledged":true}
-- 查看索引的設(shè)置
curl http://172.16.22.51:9200/idx_luoluocaihong/_settings
{"idx_luoluocaihong":{"settings":{"index":{"creation_date":"1561107195032","number_of_shards":"5","number_of_replicas":"1","uuid":"3iS68s1nQMudxhyL-zNnRg","version":{"created":"6020499"},"provided_name":"idx_luoluocaihong"}}}}
-- 添加文檔
curl http://172.16.22.51:9200/idx_luoluocaihong/_doc/2 -X PUT -H 'Content-Type:application/json' -d '{"user":"user_2","age":"20"}'
{"_index":"idx_luoluocaihong","_type":"_doc","_id":"2","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":0,"_primary_term":1}
-- 搜索
curl http://172.16.22.51:9200/idx_luoluocaihong/_search?q=age:20
{"took":19,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":2,"max_score":0.2876821,"hits":[{"_index":"idx_luoluocaihong","_type":"_doc","_id":"2","_score":0.2876821,"_source":{"user":"user_2","age":"20"}},{"_index":"idx_luoluocaihong","_type":"_doc","_id":"1","_score":0.2876821,"_source":{"user":"luoluocaihong","age":"20"}}]}}
-- 查看集群設(shè)置
curl 172.16.22.51:9200/_cluster/settings
-- 修改集群設(shè)置
curl 172.16.22.51:9200/_cluster/settings -X PUT -H 'Content-Type:application/json' -d '{"transient":{"cluster.routing.allocation.disk.watermark.low":"80%","cluster.routing.allocation.disk.watermark.high":"85%","cluster.routing.allocation.disk.watermark.flood_stage":"90%"}}'
{"acknowledged":true,"persistent":{},"transient":{"cluster":{"routing":{"allocation":{"disk":{"watermark":{"low":"80%","flood_stage":"90%","high":"85%"}}}}}}}
-- 檢查索引狀況
curl http://172.16.22.51:9200/_cat/indices
green open idx_luoluocaihong 3iS68s1nQMudxhyL-zNnRg 5 1 2 0 17.4kb 8.7kb
-- 檢查es集群健康狀況
curl 172.16.22.51:9200/_cluster/health?pretty
{
"cluster_name" : "iot-es",
"status" : "green",
"timed_out" : false,
"number_of_nodes" : 3,
"number_of_data_nodes" : 3,
"active_primary_shards" : 15,
"active_shards" : 30,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 0,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 0,
"number_of_in_flight_fetch" : 0,
"task_max_waiting_in_queue_millis" : 0,
"active_shards_percent_as_number" : 100.0
}
-- 刪除索引
curl http://172.16.22.51:9200/idx_luoluocaihong -X DELETE