文章

Prometheus部署-docker-compose

docker-compose部署

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
version: '3.8'

services:
    prometheus:
        image: prom/prometheus
        container_name: prometheus
        hostname: prometheus
        restart: always
        volumes:
            - /opt/prometheus/config/prometheus.yml:/etc/prometheus/prometheus.yml:rw
            - /opt/prometheus/config/rule/node_down.yml:/etc/prometheus/node_down.yml:rw
            - /opt/prometheus/data:/prometheus:rw
        ports:
            - "9090:9090"

#    alertmanager:
#        image: prom/alertmanager
#        container_name: alertmanager
#        hostname: alertmanager
#        restart: always
#        volumes:
#            - /usr/local/src/config/alertmanager.yml:/etc/alertmanager/alertmanager.yml
#        ports:
#            - "9093:9093"

    grafana:
        image: grafana/grafana
        container_name: grafana
        hostname: grafana
        restart: always
        ports:
            - "3000:3000"
        volumes:
            - /opt/prometheus/grafana_data:/var/lib/grafana:rw

    node-exporter:
        image: quay.io/prometheus/node-exporter
        container_name: node-exporter
        hostname: node-exporter
        restart: always
        ports:
            - "9100:9100"

#    cadvisor:
#        image: google/cadvisor:latest
#        container_name: cadvisor
#        hostname: cadvisor
#        restart: always
#        volumes:
#            - /:/rootfs:ro
#            - /var/run:/var/run:rw
#            - /sys:/sys:ro
#            - /var/lib/docker/:/var/lib/docker:ro
#        ports:
#            - "8080:8080"

配置文件: prometheus.yml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.

#alerting:
#  alertmanagers:
#  - static_configs:
#    - targets: ['172.17.3.7:9093']
#      # - alertmanager:9093

#rule_files:
#  - "node_down.yml"
#  # - "first_rules.yml"
#  # - "second_rules.yml"

scrape_configs:
  - job_name: prometheus
    static_configs:
      - targets: ['172.17.3.7:9090']

  - job_name: bank
    static_configs:
      - targets: ['172.17.3.7:9100']
 
  - job_name: client
    static_configs:
      - targets: ['172.17.3.7:9100']
  - job_name: bin-k8s-node02
    static_configs:
      - targets: ['172.17.3.15:9100']
 
#  - job_name: 'cadvisor'
#    static_configs:
#    - targets: ['172.17.3.7:8080']

#  - job_name: 'node'
#    scrape_interval: 8s
#    static_configs:
#      - targets: ['172.17.3.7:9100']

配置文件: node_down.yml

1
2
3
4
5
6
7
8
9
10
11
12
igroups:
- name: node_down
  rules:
  - alert: InstanceDown
    expr: up == 0
    for: 1m
    labels:
      user: test
    annotations:
       summary: "Instance {{ $labels.instance }} down" 
       description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." 

配置文件: altermanager.yml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
global:
  smtp_smarthost: 'smtp.exmail.qq.com'  #163服务器
  smtp_smartport: '465'  #163服务器
  smtp_from: 'fscloude@fscloude.cn'        #发邮件的邮箱
  smtp_auth_username: 'fscloude@fscloude.cn'  #发邮件的邮箱用户名,也就是你的邮箱
  smtp_auth_password: 'CYeHgjt3483QnHDr'        #发邮件的邮箱密码
  smtp_require_tls: true        #不进行tls验证

route:
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 10m
  receiver: live-monitoring

receivers:
- name: 'live-monitoring'
  email_configs:
  - to: 'fscloude@fscloude.cn'        #收邮件的邮箱

启动

1
2
3
4
5
6
启动之前将docker-compose.yml中的文件放在挂载的对应文件夹下,特别是配置文件:
	prometheus.yml
	node_down.yml

# 启动
docker-compose up -d 
本文由作者按照 CC BY 4.0 进行授权