prometheus使用2

一般操作

查看之前安裝的

[root@mcw03 ~]# cd /usr/local/prometheus/
[root@mcw03 prometheus]# ls
console_libraries  consoles  LICENSE  NOTICE  prometheus  prometheus.yml  promtool
[root@mcw03 prometheus]# less prometheus.yml 
[root@mcw03 prometheus]#

查看配置

[root@mcw03 prometheus]# cat prometheus.yml
# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
    - targets: ['localhost:9090']
  - job_name: 'agent1'   
    static_configs:
    - targets: ['10.0.0.14:9100']
  - job_name: 'promserver'   
    static_configs:
    - targets: ['10.0.0.13:9100']
  - job_name: 'server_mariadb' 
    static_configs:
    - targets: ['10.0.0.13:9104']
[root@mcw03 prometheus]#

啓動：

[root@mcw03 prometheus]# 
[root@mcw03 prometheus]#  /usr/local/prometheus/prometheus --config.file="/usr/local/prometheus/prometheus.yml" &
[1] 82834
[root@mcw03 prometheus]# level=info ts=2024-01-29T15:38:28.958560959Z caller=main.go:244 msg="Starting Prometheus" version="(version=2.5.0, branch=HEAD, revision=67dc912ac8b24f94a1fc478f352d25179c94ab9b)"
level=info ts=2024-01-29T15:38:28.958619046Z caller=main.go:245 build_context="(go=go1.11.1, user=root@578ab108d0b9, date=20181106-11:40:44)"
level=info ts=2024-01-29T15:38:28.95863261Z caller=main.go:246 host_details="(Linux 3.10.0-693.el7.x86_64 #1 SMP Tue Aug 22 21:09:27 UTC 2017 x86_64 mcw03 (none))"
level=info ts=2024-01-29T15:38:28.958644576Z caller=main.go:247 fd_limits="(soft=65535, hard=65535)"
level=info ts=2024-01-29T15:38:28.958654061Z caller=main.go:248 vm_limits="(soft=unlimited, hard=unlimited)"
level=info ts=2024-01-29T15:38:28.959638098Z caller=main.go:562 msg="Starting TSDB ..."
level=info ts=2024-01-29T15:38:28.96581693Z caller=main.go:572 msg="TSDB started"
level=info ts=2024-01-29T15:38:28.966063978Z caller=main.go:632 msg="Loading configuration file" filename=/usr/local/prometheus/prometheus.yml
level=info ts=2024-01-29T15:38:28.968164139Z caller=main.go:658 msg="Completed loading of configuration file" filename=/usr/local/prometheus/prometheus.yml
level=info ts=2024-01-29T15:38:28.968197199Z caller=main.go:531 msg="Server is ready to receive web requests."
level=info ts=2024-01-29T15:38:28.969282856Z caller=web.go:399 component=web msg="Start listening for connections" address=0.0.0.0:9090

訪問地址：http://10.0.0.13:9090/，啓動的時候可以看到，默認跳到http://10.0.0.13:9090/graph了

驗證配置文件

[root@mcw03 prometheus]# ls
console_libraries  consoles  data  LICENSE  NOTICE  prometheus  prometheus.yml  promtool
[root@mcw03 prometheus]# ./promtool check config prometheus.yml
Checking prometheus.yml
  SUCCESS: 0 rule files found

[root@mcw03 prometheus]#

將配置前面多加個o,檢查配置失敗

[root@mcw03 prometheus]# tail -3 prometheus.yml
  - job_name: 'server_mariadb' 
    static_configs:
    o- targets: ['10.0.0.13:9104']
[root@mcw03 prometheus]# ./promtool check config prometheus.yml
Checking prometheus.yml
  FAILED: parsing YAML file prometheus.yml: yaml: unmarshal errors:
  line 38: field o- targets not found in type config.plain

[root@mcw03 prometheus]#

修改配置重載

修改配置把14的註釋掉，檢查配置，重載配置，報錯

[root@mcw03 prometheus]# vim prometheus.yml 
[root@mcw03 prometheus]# cat prometheus.yml
# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
    - targets: ['localhost:9090']
  #- job_name: 'agent1'   
  #  static_configs:
  #  - targets: ['10.0.0.14:9100']
  - job_name: 'promserver'   
    static_configs:
    - targets: ['10.0.0.13:9100']
  - job_name: 'server_mariadb' 
    static_configs:
    - targets: ['10.0.0.13:9104']
[root@mcw03 prometheus]# ./promtool check config prometheus.yml
Checking prometheus.yml
  SUCCESS: 0 rule files found

[root@mcw03 prometheus]# curl -X POST http://localhost:9090/-/reload
Lifecycle APIs are not enabled[root@mcw03 prometheus]#

熱加載

prometheus啓動後修改配置文件就需要再重啓生效

可以通過以下方式熱加載

curl -X POST http://localhost:9090/-/reload

請求接口後返回 Lifecycle API is not enabled. 那麼就是啓動的時候沒有開啓熱更新配置，需要在啓動的命令行增加參數： --web.enable-lifecycle

./prometheus --web.enable-lifecycle --config.file=prometheus.yml

如果已經把promtheus配置到了Linux系統服務系統裏面，需要到systemd的system文件夾下修改promtheus對應的.service文件。
大概步驟如下：

然後執行命令

systemctl daemon-reload
systemctl restart prometheus
1
2
後面每次修改了prometheus配置文件後，可以調用接口進行配置的熱加載：

curl -X POST http://ip:9090/-/reload
1
參考文章：

prometheus熱加載配置文件
https://blog.csdn.net/qq_21133131/article/details/117568214

Prometheus監控學習筆記之Prometheus如何熱加載更新配置
https://www.cnblogs.com/momoyan/p/12039895.html

原文鏈接：https://blog.csdn.net/qq_39595769/article/details/119240941

@@@

先殺掉，

[root@mcw03 prometheus]# ps -ef|grep prome
root      82834   2094  0 Jan29 pts/0    00:00:01 /usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml
root      84432   2094  0 00:17 pts/0    00:00:00 grep --color=auto prome
[root@mcw03 prometheus]# kill 82834
[root@mcw03 prometheus]# level=warn ts=2024-01-29T16:17:55.448944181Z caller=main.go:406 msg="Received SIGTERM, exiting gracefully..."
level=info ts=2024-01-29T16:17:55.448992753Z caller=main.go:431 msg="Stopping scrape discovery manager..."
level=info ts=2024-01-29T16:17:55.448999882Z caller=main.go:445 msg="Stopping notify discovery manager..."
level=info ts=2024-01-29T16:17:55.449004831Z caller=main.go:467 msg="Stopping scrape manager..."
level=info ts=2024-01-29T16:17:55.449023164Z caller=main.go:427 msg="Scrape discovery manager stopped"
level=info ts=2024-01-29T16:17:55.449031517Z caller=main.go:441 msg="Notify discovery manager stopped"
level=info ts=2024-01-29T16:17:55.449051788Z caller=manager.go:657 component="rule manager" msg="Stopping rule manager..."
level=info ts=2024-01-29T16:17:55.449060796Z caller=manager.go:663 component="rule manager" msg="Rule manager stopped"
level=info ts=2024-01-29T16:17:55.449622055Z caller=main.go:461 msg="Scrape manager stopped"
level=info ts=2024-01-29T16:17:55.449728933Z caller=notifier.go:512 component=notifier msg="Stopping notification manager..."
level=info ts=2024-01-29T16:17:55.44974018Z caller=main.go:616 msg="Notifier manager stopped"
level=info ts=2024-01-29T16:17:55.449872966Z caller=main.go:628 msg="See you next time!"

加上上面的參數啓動，這樣支持熱加載了

[root@mcw03 prometheus]# /usr/local/prometheus/prometheus --web.enable-lifecycle --config.file="/usr/local/prometheus/prometheus.yml" &
[1] 84520
[root@mcw03 prometheus]# level=info ts=2024-01-29T16:19:57.779420663Z caller=main.go:244 msg="Starting Prometheus" version="(version=2.5.0, branch=HEAD, revision=67dc912ac8b24f94a1fc478f352d25179c94ab9b)"
level=info ts=2024-01-29T16:19:57.779482093Z caller=main.go:245 build_context="(go=go1.11.1, user=root@578ab108d0b9, date=20181106-11:40:44)"
level=info ts=2024-01-29T16:19:57.779505718Z caller=main.go:246 host_details="(Linux 3.10.0-693.el7.x86_64 #1 SMP Tue Aug 22 21:09:27 UTC 2017 x86_64 mcw03 (none))"
level=info ts=2024-01-29T16:19:57.779518271Z caller=main.go:247 fd_limits="(soft=65535, hard=65535)"
level=info ts=2024-01-29T16:19:57.77952732Z caller=main.go:248 vm_limits="(soft=unlimited, hard=unlimited)"
level=info ts=2024-01-29T16:19:57.780838853Z caller=main.go:562 msg="Starting TSDB ..."
level=info ts=2024-01-29T16:19:57.813389846Z caller=web.go:399 component=web msg="Start listening for connections" address=0.0.0.0:9090
level=info ts=2024-01-29T16:19:57.828718461Z caller=main.go:572 msg="TSDB started"
level=info ts=2024-01-29T16:19:57.828777376Z caller=main.go:632 msg="Loading configuration file" filename=/usr/local/prometheus/prometheus.yml
level=info ts=2024-01-29T16:19:57.829456749Z caller=main.go:658 msg="Completed loading of configuration file" filename=/usr/local/prometheus/prometheus.yml
level=info ts=2024-01-29T16:19:57.829470351Z caller=main.go:531 msg="Server is ready to receive web requests."

此時沒有14的，

將14的配置註釋去掉

[root@mcw03 prometheus]# vim prometheus.yml 
[root@mcw03 prometheus]# cat prometheus.yml
# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
    - targets: ['localhost:9090']
  - job_name: 'agent1'   
    static_configs:
    - targets: ['10.0.0.14:9100']
  - job_name: 'promserver'   
    static_configs:
    - targets: ['10.0.0.13:9100']
  - job_name: 'server_mariadb' 
    static_configs:
    - targets: ['10.0.0.13:9104']
[root@mcw03 prometheus]#

執行重載

[root@mcw03 prometheus]# curl -X POST http://localhost:9090/-/reload
level=info ts=2024-01-29T16:22:22.264583475Z caller=main.go:632 msg="Loading configuration file" filename=/usr/local/prometheus/prometheus.yml
level=info ts=2024-01-29T16:22:22.264875915Z caller=main.go:658 msg="Completed loading of configuration file" filename=/usr/local/prometheus/prometheus.yml
[root@mcw03 prometheus]#

刷新頁面可以看到14已經有了

設置systemctl管理

# cat /usr/lib/systemd/system/prometheus.service
[Unit]
Description=Prometheus Node Exporter
After=network.target
 
[Service]
ExecStart=/usr/local/prometheus/prometheus --config.file=/etc/prometheus.yml --web.read-timeout=5m  --web.max-connections=10 --storage.tsdb.retention=15d --storage.tsdb.path=/prometheus/data --query.max-concurrency=20 --query.timeout=2m
User=root
[Install]
WantedBy=multi-user.target

　　啓動參數解釋

–config.file=/etc/prometheus.yml 指定配置文件
  
–web.read-timeout=5m 請求鏈接的最大等待時間，防止太多的空閒鏈接佔用資源
  
–web.max-connections=512 針對prometheus，獲取數據源的時候，建立的網絡鏈接數，做一個最大數字的限制，防止鏈接數過多造成資源過大的消耗
  
–storage.tsdb.retention=15d 重要參數，prometheus 開始採集監控數據後，會存在內存和硬盤中；對於保存期限的設置。時間過長，硬盤和內存都喫不消；時間太短，要查歷史數據就沒了。企業15天最爲合適。
  
–storage.tsdb.path="/prometheus/data" 存儲數據路徑，不要隨便定義
  
–query.max-concurrency=20 用戶查詢最大併發數
  
–query.timeout=2m 慢查詢強制終止

　　注意：配置文件不能加雙引號，否則啓動報錯找不到文件或目錄

　　　　本次啓動用戶是root生產中最好新建一個用戶用於啓動，需要設置配置文件及數據文件權限

　　　　數據目錄在生產中最好單獨配置數據硬盤，使用LVM硬盤格式配置

　　啓動

#啓動
systemctl start prometheus
#設置開機自啓動
systemctl enable prometheus

　　查看是否啓動

lsof -i:9090
ps -ef|grep prometheus

@@@

創建文件，並創建對應的目錄

[root@mcw03 prometheus]# cat /usr/lib/systemd/system/prometheus.service
cat: /usr/lib/systemd/system/prometheus.service: No such file or directory
[root@mcw03 prometheus]# systemctl status prometheus
Unit prometheus.service could not be found.
[root@mcw03 prometheus]# vim /usr/lib/systemd/system/prometheus.service
[root@mcw03 prometheus]# pwd
/usr/local/prometheus
[root@mcw03 prometheus]# vim /usr/lib/systemd/system/prometheus.service
[root@mcw03 prometheus]# 
[root@mcw03 prometheus]# 
[root@mcw03 prometheus]# ls
console_libraries  consoles  data  LICENSE  NOTICE  prometheus  prometheus.yml  promtool
[root@mcw03 prometheus]# 
[root@mcw03 prometheus]# ls /data/
gv0  gv1  gv2  gv3
[root@mcw03 prometheus]# ls /
bin  boot  data  dev  etc  home  hs_err_pid18517.log  lib  lib64  media  mnt  opt  proc  root  run  sbin  srv  sys  tmp  user  usr  var
[root@mcw03 prometheus]# mkdir /prometheus/data
mkdir: cannot create directory ‘/prometheus/data’: No such file or directory
[root@mcw03 prometheus]# mkdir /prometheus/data -p
[root@mcw03 prometheus]# ls
console_libraries  consoles  data  LICENSE  NOTICE  prometheus  prometheus.yml  promtool
[root@mcw03 prometheus]# cp prometheus.yml  /etc/
[root@mcw03 prometheus]# cat /usr/lib/systemd/system/prometheus.service
[Unit]
Description=Prometheus Node Exporter
After=network.target
 
[Service]
ExecStart=/usr/local/prometheus/prometheus --config.file=/etc/prometheus.yml --web.read-timeout=5m  --web.max-connections=10 --storage.tsdb.retention=15d --storage.tsdb.path=/prometheus/data --query.max-concurrency=20 --query.timeout=2m
User=root
[Install]
WantedBy=multi-user.target
[root@mcw03 prometheus]# ls data/
lock  wal
[root@mcw03 prometheus]# ls data/wal/
00000000
[root@mcw03 prometheus]# ls console
ls: cannot access console: No such file or directory
[root@mcw03 prometheus]# ls consoles/
index.html.example  node-cpu.html  node-disk.html  node.html  node-overview.html  prometheus.html  prometheus-overview.html
[root@mcw03 prometheus]# ls console_libraries/
menu.lib  prom.lib
[root@mcw03 prometheus]#

此時頁面是這樣的

停止並用systemctl啓動

[root@mcw03 prometheus]# ps -ef|grep prom
root      84520   2094  0 00:19 pts/0    00:00:00 /usr/local/prometheus/prometheus --web.enable-lifecycle --config.file=/usr/local/prometheus/prometheus.yml
root      85064   2094  0 00:32 pts/0    00:00:00 grep --color=auto prom
[root@mcw03 prometheus]# kill 84520
[root@mcw03 prometheus]# level=warn ts=2024-01-29T16:32:10.65552338Z caller=main.go:406 msg="Received SIGTERM, exiting gracefully..."
level=info ts=2024-01-29T16:32:10.655566913Z caller=main.go:431 msg="Stopping scrape discovery manager..."
level=info ts=2024-01-29T16:32:10.655574899Z caller=main.go:445 msg="Stopping notify discovery manager..."
level=info ts=2024-01-29T16:32:10.65557968Z caller=main.go:467 msg="Stopping scrape manager..."
level=info ts=2024-01-29T16:32:10.655598695Z caller=main.go:427 msg="Scrape discovery manager stopped"
level=info ts=2024-01-29T16:32:10.655606567Z caller=main.go:441 msg="Notify discovery manager stopped"
level=info ts=2024-01-29T16:32:10.655627041Z caller=manager.go:657 component="rule manager" msg="Stopping rule manager..."
level=info ts=2024-01-29T16:32:10.655635473Z caller=manager.go:663 component="rule manager" msg="Rule manager stopped"
level=info ts=2024-01-29T16:32:10.65608701Z caller=main.go:461 msg="Scrape manager stopped"
level=info ts=2024-01-29T16:32:10.656138338Z caller=notifier.go:512 component=notifier msg="Stopping notification manager..."
level=info ts=2024-01-29T16:32:10.65615002Z caller=main.go:616 msg="Notifier manager stopped"
level=info ts=2024-01-29T16:32:10.656259633Z caller=main.go:628 msg="See you next time!"

[1]+  Done                    /usr/local/prometheus/prometheus --web.enable-lifecycle --config.file="/usr/local/prometheus/prometheus.yml"
[root@mcw03 prometheus]# systemctl status prometheus
● prometheus.service - Prometheus Node Exporter
   Loaded: loaded (/usr/lib/systemd/system/prometheus.service; disabled; vendor preset: disabled)
   Active: inactive (dead)
[root@mcw03 prometheus]# systemctl start prometheus
[root@mcw03 prometheus]# systemctl status prometheus
● prometheus.service - Prometheus Node Exporter
   Loaded: loaded (/usr/lib/systemd/system/prometheus.service; disabled; vendor preset: disabled)
   Active: active (running) since Tue 2024-01-30 00:32:29 CST; 7s ago
 Main PID: 85086 (prometheus)
   CGroup: /system.slice/prometheus.service
           └─85086 /usr/local/prometheus/prometheus --config.file=/etc/prometheus.yml --web.read-timeout=5m --web.max-connections=10 --storage.tsdb.retention=15d --storage.tsdb.path=...

Jan 30 00:32:29 mcw03 prometheus[85086]: level=info ts=2024-01-29T16:32:29.116661038Z caller=main.go:245 build_context="(go=go1.11.1, user=root@578ab108d0b9, date=20181106-11:40:44)"
Jan 30 00:32:29 mcw03 prometheus[85086]: level=info ts=2024-01-29T16:32:29.116676722Z caller=main.go:246 host_details="(Linux 3.10.0-693.el7.x86_64 #1 SMP Tue Aug 22 21:0...w03 (none))"
Jan 30 00:32:29 mcw03 prometheus[85086]: level=info ts=2024-01-29T16:32:29.116690993Z caller=main.go:247 fd_limits="(soft=1024, hard=4096)"
Jan 30 00:32:29 mcw03 prometheus[85086]: level=info ts=2024-01-29T16:32:29.116701722Z caller=main.go:248 vm_limits="(soft=unlimited, hard=unlimited)"
Jan 30 00:32:29 mcw03 prometheus[85086]: level=info ts=2024-01-29T16:32:29.118003926Z caller=main.go:562 msg="Starting TSDB ..."
Jan 30 00:32:29 mcw03 prometheus[85086]: level=info ts=2024-01-29T16:32:29.122879549Z caller=main.go:572 msg="TSDB started"
Jan 30 00:32:29 mcw03 prometheus[85086]: level=info ts=2024-01-29T16:32:29.122934471Z caller=main.go:632 msg="Loading configuration file" filename=/etc/prometheus.yml
Jan 30 00:32:29 mcw03 prometheus[85086]: level=info ts=2024-01-29T16:32:29.123963083Z caller=main.go:658 msg="Completed loading of configuration file" filename=/etc/prometheus.yml
Jan 30 00:32:29 mcw03 prometheus[85086]: level=info ts=2024-01-29T16:32:29.123980522Z caller=main.go:531 msg="Server is ready to receive web requests."
Jan 30 00:32:29 mcw03 prometheus[85086]: level=info ts=2024-01-29T16:32:29.124447919Z caller=web.go:399 component=web msg="Start listening for connections" address=0.0.0.0:9090
Hint: Some lines were ellipsized, use -l to show in full.
[root@mcw03 prometheus]# ps -ef|grep prome
root      85086      1  0 00:32 ?        00:00:00 /usr/local/prometheus/prometheus --config.file=/etc/prometheus.yml --web.read-timeout=5m --web.max-connections=10 --storage.tsdb.retention=15d --storage.tsdb.path=/prometheus/data --query.max-concurrency=20 --query.timeout=2m
root      85105   2094  0 00:32 pts/0    00:00:00 grep --color=auto prome
[root@mcw03 prometheus]#

刷新頁面，沒有啥變化

將14的配置註釋掉，發現還是不能用重載了

[root@mcw03 prometheus]# vim /etc/prometheus.yml 
[root@mcw03 prometheus]# curl -X POST http://localhost:9090/-/reload
Lifecycle APIs are not enabled[root@mcw03 prometheus]#

加上這個參數 --web.enable-lifecycle，然後重新啓動

[root@mcw03 prometheus]# vim /usr/lib/systemd/system/prometheus.service
[root@mcw03 prometheus]# cat /usr/lib/systemd/system/prometheus.service
[Unit]
Description=Prometheus Node Exporter
After=network.target
 
[Service]
ExecStart=/usr/local/prometheus/prometheus --config.file=/etc/prometheus.yml  --web.enable-lifecycle --web.read-timeout=5m  --web.max-connections=10 --storage.tsdb.retention=15d --storage.tsdb.path=/prometheus/data --query.max-concurrency=20 --query.timeout=2m
User=root
[Install]
WantedBy=multi-user.target
[root@mcw03 prometheus]# systemctl start prometheus
Warning: prometheus.service changed on disk. Run 'systemctl daemon-reload' to reload units.
[root@mcw03 prometheus]# systemctl daemon-reload 
[root@mcw03 prometheus]# systemctl start prometheus
[root@mcw03 prometheus]#

此時14down

取消註釋掉的14機器，然後修改配置重載，正常重載

[root@mcw03 prometheus]# vim /etc/prometheus.yml 
[root@mcw03 prometheus]# grep agent1 -A 4 /etc/prometheus.yml
  - job_name: 'agent1'   
    static_configs:
    - targets: ['10.0.0.14:9100']
  - job_name: 'promserver'   
    static_configs:
[root@mcw03 prometheus]# curl -X POST http://localhost:9090/-/reload
[root@mcw03 prometheus]#

刷新一下，14up了

添加第二個node,放在同一組下面

先將客戶端程序從mcw02複製到mcw02

[root@mcw04 ~]# scp -rp /usr/local/node_exporter/ 10.0.0.12:/usr/local
The authenticity of host '10.0.0.12 (10.0.0.12)' can't be established.
ECDSA key fingerprint is SHA256:mc9PiiU0mo/DDfwqVPG5s2VIrSDe1B+9iZM7rSeC/Zg.
ECDSA key fingerprint is MD5:86:5b:8b:ee:46:2b:47:a5:fb:cf:f9:68:e3:ee:b0:2a.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added '10.0.0.12' (ECDSA) to the list of known hosts.
root@10.0.0.12's password: 
LICENSE                                                                                                                                                100%   11KB 966.1KB/s   00:00    
node_exporter                                                                                                                                          100%   16MB  43.6MB/s   00:00    
NOTICE                                                                                                                                                 100%  463   248.9KB/s   00:00    
[root@mcw04 ~]#

mcw02上啓動起來

[root@mcw02 ~]# nohup /usr/local/node_exporter/node_exporter &
[1] 25347
[root@mcw02 ~]# nohup: ignoring input and appending output to ‘nohup.out’

[root@mcw02 ~]# ps -ef|grep node_export
root      25347   1746  0 10:50 pts/0    00:00:00 /usr/local/node_exporter/node_exporter
root      25354   1746  0 10:50 pts/0    00:00:00 grep --color=auto node_export
[root@mcw02 ~]# ss -lntup|grep 25347
tcp    LISTEN     0      16384    :::9100                 :::*                   users:(("node_exporter",pid=25347,fd=3))
[root@mcw02 ~]#

添加這個節點的監控之前

添加到agent1監控組下，然後重載配置

[root@mcw03 prometheus]# vim /etc/prometheus.yml
[root@mcw03 prometheus]# cat /etc/prometheus.yml
# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
    - targets: ['localhost:9090']
  - job_name: 'agent1'   
    static_configs:
    - targets: ['10.0.0.14:9100']
    - targets: ['10.0.0.12:9100']
  - job_name: 'promserver'   
    static_configs:
    - targets: ['10.0.0.13:9100']
  - job_name: 'server_mariadb' 
    static_configs:
    - targets: ['10.0.0.13:9104']
[root@mcw03 prometheus]# curl -X POST http://localhost:9090/-/reload
[root@mcw03 prometheus]#