目錄
一、系統要求
- 操作系統:Linux
- MySQL ≥ 5.6
# cp /data/mysql-5.6.16/bin/mysql /usr/bin/
# cp /data/mysql-5.6.16/bin/mysqladmin /usr/bin/
二、邏輯原理
通過 Base_Mysql_plugin/60_port_collector.sh 獲取當前機器所有 MySQL 實例端口號,進而獲取 MySQL 相關指標信息,push 到 open-falcon 中。
將腳本 30_monitordata_collector.sh 放置到 falcon-agent 的 plugin/Base_Mysql_plugin 目錄,在 portal 上將 plugin 目錄綁定到相應的 host group,falcon-agent 通過自身的調度器執行該腳本,由 falcon-agent 解析腳本的標準輸出,將得到監控項推送到 falcon-judge 進行報警閥值判斷判斷。
三、彙報字段
module | prefix | metric | attribute | tag | type | note |
---|---|---|---|---|---|---|
Test_slave_status | mysqld_ |
slavestatus | compute | port=[mysql_port] |
GAUGE | 主從同步狀態,正常值爲 2 |
Test_app_alive | alive | compute | GAUGE | 數據庫存活狀態,正常值爲 1 | ||
Test_max_connection | Threads_connected | undefined | GAUGE | 數據庫連接數過多拋錯時停止本次數據參加腳本(直接 exit) | ||
global_status |
Aborted_clients | compute | COUNTER | 由於客戶端沒有正確關閉連接導致客戶端終止而中斷的連接數 | ||
Aborted_connects | compute | COUNTER | 試圖連接到MySQL服務器而失敗的連接數 | |||
Bytes_received | compute | COUNTER | 從所有客戶端接收到的字節數 | |||
Bytes_sent | compute | COUNTER | 發送給所有客戶端的字節數 | |||
Com_lock_tables | compute | COUNTER | Com_xxx 語句計數變量表示每個xxx 語句執行的次數。每類語句有一個狀態變量。例如,Com_select 和 Com_insert 分別統計 SELECT(QPS) 和 INSERT 語句執行的次數 |
|||
Com_rollback | compute | COUNTER | ||||
Com_delete | compute | COUNTER | ||||
Com_insert | compute | COUNTER | ||||
Com_insert_select | compute | COUNTER | ||||
Com_load | compute | COUNTER | ||||
Com_replace | compute | COUNTER | ||||
Com_select | compute | COUNTER | ||||
Com_update | compute | COUNTER | ||||
Qcache_hits | compute | COUNTER | 查詢緩存被訪問的次數 | |||
Slow_queries | compute | COUNTER | 查詢時間超過 long_query_time 秒的查詢的個數 | |||
Threads_connected | undefined | GAUGE | 當前打開的連接的數量 | |||
Threads_running | undefined | GAUGE | 激活的(非睡眠狀態)線程數 | |||
Uptime | undefined | GAUGE | 服務器已經運行的時間(以秒爲單位),判斷數據庫是否重啓 | |||
slave_status | second_behind_master | undefined | GAUGE | 這個值是時間戳的差值。是 slave當前的時間戳和 master 記錄該事件時的時間戳的差值 | ||
global_variables |
auto_increment_increment | undefined | GAUGE | 增量 | ||
auto_increment_offset | undefined | GAUGE | 起始值/偏移量 | |||
autocommit | undefined | GAUGE | 自動提交機制 | |||
binlog_format | undefined | GAUGE | 二進制日誌格式 , 建議設置爲 row | |||
general_log | undefined | GAUGE | 查詢日誌開關 | |||
gtid_mode | undefined | GAUGE | MySQL 通過全局變量 gtid_mode控制開啓/關閉 GTID 模式 | |||
query_cache_size | undefined | GAUGE | 查詢緩存大小 | |||
query_cache_type | undefined | GAUGE | 緩存類型,決定緩存什麼樣的查詢 | |||
read_only | undefined | GAUGE |
當變量對複製從服務器設置爲ON時,從服務器不允許更新,除非通過從服務器的線程或用戶擁有SUPER權限。 可以確保從服務器不接受客戶端的更新命令 |
|||
report_host | undefined | GAUGE | Report 系列是設置在從庫上的,包含四個參數 report-[host|port|user|password]. 當 my.cnf 中設置了 report-host 時,在從庫執行 start slave 的時候,會將 report-host 和 report-port(默認 3306)發給主庫,主庫記錄在全局哈希結構變量 slave_list 中 |
|||
report_port | undefined | GAUGE | ||||
server_id | undefined | GAUGE | 服務器 ID | |||
server_uuid | undefined | GAUGE | 服務器 UUID | |||
skip_name_resolve | undefined | GAUGE | 跳過 DNS 反向解析 | |||
slave_skip_errors | undefined | GAUGE | 跳過的錯誤號 | |||
slow_query_log | undefined | GAUGE | 開啓慢查詢日誌 | |||
sql_mode | undefined | GAUGE | 當前的服務器 SQL 模式,可以動態設置 | |||
time_zone | undefined | GAUGE | 當前的時區 | |||
tx_isolation | undefined | GAUGE | MySQL 默認的隔離級別 | |||
version | undefined | GAUGE | MySQL 版本 | |||
max_connections | undefined | GAUGE | 數據庫最大連接數 |
一般以上標紅指標可在 Open-Falcon 設置 Screen 以進一步做觀察分析。
四、監控告警設置
說明: 請根據實際情部署情況以及使用方式,自行調整監控項觸發條件,以下報警條件只是基礎監控,詳細報警條件請自行調整。
監控項 |
告警觸發條件 |
備註 |
---|---|---|
net.port.listen/port=[mysql_port] | all(#3)==0 | 數據庫端口 down |
mysqld_alive/port=[mysql_port] | all(#2)==0 | 數據庫存活狀態異常 |
mysqld_Seconds_Behind_Master/port=[mysql_port] | all(#5)>=120 | MySQL主從延遲超過2分鐘 |
mysqld_Uptime/port=[mysql_port] | diff(#2)<=0 | 數據庫實例重啓 |
mysqld_Threads_connected/port=[mysql_port] | all(#3)>=6000 | 數據庫連接數過多 |
mysqld_max_connections/port=[mysql_port] | all(#2)<=5000 | 數據庫最大連接數設置過低 |
五、MySQL 監控指標採集腳本
1、60_port_collector.sh
功能:獲取當前機器所有實例端口號,產出數據作爲 30_monitordata_collector.sh 腳本的輸入數據。
#!/bin/bash
service="mysqld"
dirname=$(cd $(dirname $0);pwd|awk -F\/ '$0=$NF')
path="/home/falcon2/falcon_monitor/$dirname"
Get_portstatus(){
mkdir -p $path
port_field=$(($(cat /etc/issue|awk -F'[ .]' 'NR==1&&$0=$3')-1))
ss -tunlp|awk '$NF~/'$service'/{match($'$port_field',/:([0-9]+)$/,a);print a[1]" '$service'"}' > $path/${service}_portstatus
}
Get_portstatus
2、30_monitordata_collector.sh
#!/bin/bash
service=mysqld
#step=$(echo $0|grep -Po '\d+(?=_)')
step=60
dirname=$(cd $(dirname $0);pwd|awk -F\/ '$0=$NF')
path="/home/falcon2/falcon_monitor/$dirname"
mondata_file="$path/falcon_${service}_monitor_"
tmp_mondata_file="$path/tmp_falcon_${service}_monitor_"
binpath="/home/falcon2/agent/nagios/libexec"
mysqld_max_con=13684
user="wufeimonitor"
pass="wufei@show1024"
host="127.0.0.1"
metric_arrays=(metric_global_status metric_slave_status metric_global_variables)
metric_global_status=(Aborted_clients:compute Aborted_connects:compute Bytes_received:compute Bytes_sent:compute Com_lock_tables:compute Com_rollback:compute Com_delete:compute Com_insert:compute Com_insert_select:compute Com_load:compute Com_replace:compute Com_select:compute Com_update:compute Qcache_hits:compute Slow_queries:compute Threads_connected:undefined Threads_running:undefined Uptime:undefined)
metric_slave_status=(second_behind_master:undefined)
metric_global_variables=(auto_increment_increment:undefined auto_increment_offset:undefined autocommit:undefined binlog_format:undefined general_log:undefined gtid_mode:undefined query_cache_size:undefined query_cache_type:undefined read_only:undefined report_host:undefined report_port:undefined server_id:undefined server_uuid:undefined skip_name_resolve:undefined slave_skip_errors:undefined slow_query_log:undefined sql_mode:undefined time_zone:undefined tx_isolation:undefined version:undefined max_connections:undefined)
Get_current_value(){
flag=$1
case $flag in
global_status)
sql="show global status"
eval $(mysql -u$user -p$pass -h$host -P$port -e "$sql" 2>/dev/null|awk '{printf("mysqld_%s=\"%s\"\n",$1,$2)}')
;;
slave_status)
sql="show slave status\G"
eval $(mysql -u$user -p$pass -h$host -P$port -e "$sql" 2>/dev/null|awk -F'[: ]+' 'NR>1&&$0="mysqld_"$2"="$3')
;;
global_variables)
sql="show global variables"
eval $(mysql -u$user -p$pass -h$host -P$port -e "$sql" 2>/dev/null|awk '{printf("mysqld_%s=\"%s\"\n",$1,$2)}')
;;
esac
}
Get_last_value(){
eval $(cat $mondata_file$port|awk -F\| '{printf("%s_last=\"%s\"\n",$1,$2)}')
}
Curl_falcon(){
for metric_array in ${metric_arrays[@]};do
{
for pre_metric in $(eval echo \${$metric_array[@]});do
{
[[ "$pre_metric" =~ ':compute' ]] \
&& countertype="COUNTER" \
|| countertype="GAUGE"
metric="mysqld_${pre_metric%%:*}"
value=$(eval echo \$$metric)
echo $metric $value $countertype
curl -s -X POST -d '[{"metric":"'$metric'","endpoint":"'$HOSTNAME'","timestamp":'$(date +%s)',"step":'$step',"value":'$value',"counterType":"'$countertype'","tags":"port='$port'"}]' http://127.0.0.1:1988/v1/push &>/dev/null
} &
done
} &
done
}
Test_max_connection(){
/usr/bin/mysql -u$user -p$pass -h$host -P$port -e 'quit' 2>&1 |grep -qi 'Too many connections' \
&& curl -s -X POST -d '[{"metric":"mysqld_Threads_connected","endpoint":"'$HOSTNAME'","timestamp":'$(date +%s)',"step":'$step',"value":'$mysqld_max_con',"counterType":"GAUGE","tags":"port='$port'"}]' http://127.0.0.1:1988/v1/push &>/dev/null \
&& exit
}
Test_app_alive(){
app_alive_status=$(/usr/bin/mysqladmin -u$user -p$pass -h$host -P$port ping 2>/dev/null |grep -i "mysqld is alive"|wc -l)
curl -s -X POST -d '[{"metric":"mysqld_alive","endpoint":"'$HOSTNAME'","timestamp":'$(date +%s)',"step":'$step',"value":'$app_alive_status',"counterType":"GAUGE","tags":"port='$port'"}]' http://127.0.0.1:1988/v1/push &>/dev/null
}
Test_port_status(){
port_status=$($binpath/check_tcp -H 127.0.0.1 -p $port|awk -F'[ :]' '{print $2=="OK"?0:1}')
curl -s -X POST -d '[{"metric":"mysqld_port","endpoint":"'$HOSTNAME'","timestamp":'$(date +%s)',"step":'$step',"value":'$port_status',"counterType":"GAUGE","tags":"port='$port'"}]' http://127.0.0.1:1988/v1/push &>/dev/null
}
Test_slave_status(){
slave_flag=$(/usr/bin/mysql -u$user -p$pass -h$host -P$port -e "show slave status\G" |grep -i Master_Host |wc -l)
[[ "$slave_flag" == "1" ]] \
&& slave_status=$(/usr/bin/mysql -u$user -p$pass -h$host -P$port -e "show slave status\G" 2>/dev/null |egrep -i "Slave_IO_Running|Slave_SQL_Running"|grep -i "yes"|grep -v "grep"|wc -l) \
|| slave_status=2
curl -s -X POST -d '[{"metric":"mysqld_slavestatus","endpoint":"'$HOSTNAME'","timestamp":'$(date +%s)',"step":'$step',"value":'$slave_status',"counterType":"GAUGE","tags":"port='$port'"}]' http://127.0.0.1:1988/v1/push &>/dev/null
}
Main(){
while read port service;do
{
Test_slave_status
#Test_port_status
Test_app_alive
Test_max_connection
Get_current_value global_status
Get_current_value slave_status
Get_current_value global_variables
Curl_falcon
} &
done< $path/${service}_portstatus
wait
}
Main