Prometheus部署-docker-compose
docker-compose部署
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
version: '3.8'
services:
prometheus:
image: prom/prometheus
container_name: prometheus
hostname: prometheus
restart: always
volumes:
- /opt/prometheus/config/prometheus.yml:/etc/prometheus/prometheus.yml:rw
- /opt/prometheus/config/rule/node_down.yml:/etc/prometheus/node_down.yml:rw
- /opt/prometheus/data:/prometheus:rw
ports:
- "9090:9090"
# alertmanager:
# image: prom/alertmanager
# container_name: alertmanager
# hostname: alertmanager
# restart: always
# volumes:
# - /usr/local/src/config/alertmanager.yml:/etc/alertmanager/alertmanager.yml
# ports:
# - "9093:9093"
grafana:
image: grafana/grafana
container_name: grafana
hostname: grafana
restart: always
ports:
- "3000:3000"
volumes:
- /opt/prometheus/grafana_data:/var/lib/grafana:rw
node-exporter:
image: quay.io/prometheus/node-exporter
container_name: node-exporter
hostname: node-exporter
restart: always
ports:
- "9100:9100"
# cadvisor:
# image: google/cadvisor:latest
# container_name: cadvisor
# hostname: cadvisor
# restart: always
# volumes:
# - /:/rootfs:ro
# - /var/run:/var/run:rw
# - /sys:/sys:ro
# - /var/lib/docker/:/var/lib/docker:ro
# ports:
# - "8080:8080"
配置文件: prometheus.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
#alerting:
# alertmanagers:
# - static_configs:
# - targets: ['172.17.3.7:9093']
# # - alertmanager:9093
#rule_files:
# - "node_down.yml"
# # - "first_rules.yml"
# # - "second_rules.yml"
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['172.17.3.7:9090']
- job_name: bank
static_configs:
- targets: ['172.17.3.7:9100']
- job_name: client
static_configs:
- targets: ['172.17.3.7:9100']
- job_name: bin-k8s-node02
static_configs:
- targets: ['172.17.3.15:9100']
# - job_name: 'cadvisor'
# static_configs:
# - targets: ['172.17.3.7:8080']
# - job_name: 'node'
# scrape_interval: 8s
# static_configs:
# - targets: ['172.17.3.7:9100']
配置文件: node_down.yml
1
2
3
4
5
6
7
8
9
10
11
12
igroups:
- name: node_down
rules:
- alert: InstanceDown
expr: up == 0
for: 1m
labels:
user: test
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
配置文件: altermanager.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
global:
smtp_smarthost: 'smtp.exmail.qq.com' #163服务器
smtp_smartport: '465' #163服务器
smtp_from: 'fscloude@fscloude.cn' #发邮件的邮箱
smtp_auth_username: 'fscloude@fscloude.cn' #发邮件的邮箱用户名,也就是你的邮箱
smtp_auth_password: 'CYeHgjt3483QnHDr' #发邮件的邮箱密码
smtp_require_tls: true #不进行tls验证
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 10m
receiver: live-monitoring
receivers:
- name: 'live-monitoring'
email_configs:
- to: 'fscloude@fscloude.cn' #收邮件的邮箱
启动
1
2
3
4
5
6
启动之前将docker-compose.yml中的文件放在挂载的对应文件夹下,特别是配置文件:
prometheus.yml
node_down.yml
# 启动
docker-compose up -d
本文由作者按照
CC BY 4.0
进行授权