[root@rockylinux8 prometheus]# cat alertmanager.yml
global:
resolve_timeout: 5m
route:
group_by: ['alertname']
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
receiver: 'feishu'
receivers:
- name: 'feishu'
webhook_configs:
- url: 'http://192.168.71.128:8088/prometheusalert?type=fs&tpl=prometheus-fs&fsurl=https://open.feishu.cn/open-apis/bot/v2/hook/14696091-e4cb-4902-8888-bd3ba0796723' # 你的飛書 Webhook URL
# Optional: 可以配置告警分組甸私、間隔等站绪,調(diào)整告警通知的頻率和方式
[root@rockylinux8 prometheus]# cat alert.rules
groups:
- name: example-alerts
rules:
- alert: HighMemoryUsage
expr: 100 * (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 80
for: 5m # 持續(xù) 5 分鐘超過(guò)閾值才觸發(fā)告警
labels:
severity: critical
annotations:
summary: "Host memory usage is above 80%"
description: "Memory usage on {{ $labels.instance }} is above 80%."
- alert: ProcessHelloDown
expr: (namedprocess_namegroup_num_procs{groupname="map[:helloworld.jar]"}) == 0
for: 1m
labels:
severity: critical
annotations:
summary: "{{ $labels.instance }}: Process helloworld-exec Down"
description: "{{ $labels.instance }}: Process helloworld has been down for more than 1m"
value: "{{ $value }}"
[root@rockylinux8 prometheus]# cat process.yml
process_names:
- name: "{{.Matches}}"
cmdline:
- 'helloworld.jar'
[root@rockylinux8 prometheus]# cat prometheus.yml
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['192.168.71.128:9090']
- job_name: 'node'
static_configs:
- targets: ['192.168.71.128:9100']
- job_name: 'process_exporter'
static_configs:
- targets: ['192.168.71.128:9256']
alerting:
alertmanagers:
- static_configs:
- targets:
- '192.168.71.128:9093' # Alertmanager 容器的地址
rule_files:
- "alert.rules" # 指向告警規(guī)則文件
[root@rockylinux8 prometheus]# cat docker-compose.yml
version: '3'
services:
prometheus:
image: prom/prometheus
container_name: prometheus
ports:
- "9090:9090"
volumes:
- /root/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- /root/prometheus/alert.rules:/etc/prometheus/alert.rules
networks:
- monitoring
grafana:
image: grafana/grafana
container_name: grafana
ports:
- "3000:3000"
environment:
GF_SECURITY_ADMIN_PASSWORD: admin
networks:
- monitoring
node-exporter:
image: prom/node-exporter
container_name: node-exporter
ports:
- "9100:9100"
networks:
- monitoring
process-exporter:
image: ncabatoff/process-exporter:0.7.10
container_name: process-exporter
ports:
- "9256:9256"
volumes:
- /proc:/host/proc
- /root/prometheus/process.yml:/config/process.yml
command: --procfs /host/proc --config.path /config/process.yml
networks:
- monitoring
alertmanager:
image: prom/alertmanager
container_name: alertmanager
ports:
- "9093:9093"
volumes:
- /root/prometheus//alertmanager.yml:/etc/alertmanager/alertmanager.yml # 掛載配置文件
networks:
- monitoring
networks:
monitoring:
driver: bridge
最后編輯于 :
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者