1
2
3
4
5
6
7
8
9
10
11
|
# vmstat 3 输出样例: procs -----------memory---------- ---swap-- -----io---- --system-- -----cpu------ r b swpd free buff cache si so bi bo in cs us sy id wa st 0 0 0 2540988 522188 5130400 0 0 2 32 4 2 4 1 96 0 0 1 0 0 2540988 522188 5130400 0 0 0 720 1199 665 1 0 99 0 0 0 0 0 2540956 522188 5130400 0 0 0 0 1151 1569 4 1 95 0 0 0 0 0 2540956 522188 5130500 0 0 0 6 1117 439 1 0 99 0 0 0 0 0 2540940 522188 5130512 0 0 0 536 1189 932 1 0 98 0 0 0 0 0 2538444 522188 5130588 0 0 0 0 1187 1417 4 1 96 0 0 0 0 0 2490060 522188 5130640 0 0 0 18 1253 1123 5 1 94 0 0 |
1
2
|
# uptime 18:02:41 up 41 days, 23:42, 1 user, load average: 0.00, 0.00, 0.00 |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
[root@testredis scripts] # cat performance.sh #!/bin/bash #监控cpu系统负载 IP=` ifconfig eth0 | grep "inet addr" | cut -f 2 -d ":" | cut -f 1 -d " " ` cpu_num=` grep -c 'model name' /proc/cpuinfo ` count_uptime=`uptime | wc -w` load_15=`uptime | awk '{print $' $count_uptime '}' ` average_load=` echo "scale=2;a=$load_15/$cpu_num;if(length(a)==scale(a)) print 0;print a" | bc ` average_int=` echo $average_load | cut -f 1 -d "." ` load_warn=0.70 if [ $average_int -gt 0 ] then echo "$IP服务器单个核心15分钟的平均负载为$average_load,超过警戒值1.0,请立即处理!!!$(date +%Y%m%d/%H:%M:%S)" >> /usr/monitor/performance/performance_ $( date +%Y%m%d).log echo "$IP服务器单个核心15分钟的平均负载为$average_load,超过警戒值1.0,请立即处理!!!$(date +%Y%m%d/%H:%M:%S)" | mail -s "$IP服务器系统负载严重告警" [email protected] else echo "$IP服务器单个核心15分钟的平均负载值为$average_load,负载正常 $(date +%Y%m%d/%H:%M:%S)" >> /usr/monitor/performance/performance_ $( date +%Y%m%d).log fi #监控cpu使用率 cpu_idle=` top -b -n 1 | grep Cpu | awk '{print $5}' | cut -f 1 -d "." ` if [ $cpu_idle -lt 20 ] then echo "$IP服务器cpu剩余$cpu_idle%,使用率已经超过80%,请及时处理。" >> /usr/monitor/performance/performance_ $( date +%Y%m%d).log else echo "$IP服务器cpu剩余$cpu_idle%,使用率正常" >> /usr/monitor/performance/performance_ $( date +%Y%m%d).log fi |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
[root@testredis scripts] # cat process.sh #!/bin/bash IP=` ifconfig eth0 | grep "inet addr" | cut -f 2 -d ":" | cut -f 1 -d " " ` tomcat_dir= "/opt/apache-tomcat-7.0.8" mysql_dir= "/usr/local/mysql/bin/mysqld_safe" vsftp_dir= "/usr/sbin/vsftpd" ssh_dir= "/usr/sbin/sshd" for dir in $tomcat_dir $mysql_dir $vsftp_dir $ssh_dir do process_count=$( ps -ef | grep "$dir" | grep - v grep | wc -l) for service in tomcat mysql vsftp ssh do echo "$dir" | grep -q "$service" if [ $? - eq 0 ] then if [ $process_count - eq 0 ] then echo "$service is down at $(date +%Y%m%d%H:%M:%S)" >> /usr/monitor/process/process_ $( date +%Y%m%d).log echo "$service is down at $(date +%Y%m%d%H:%M:%S)" | mail -s "$IP服务器 $service服务关闭告警" [email protected] else echo "$service is running at $(date +%Y%m%d%H:%M:%S)" >> /usr/monitor/process/process_ $( date +%Y%m%d).log fi else continue fi done done |
1
2
3
4
5
6
7
8
9
10
11
12
|
#!/bin/bash # R1=` cat /sys/class/net/eth0/statistics/rx_bytes ` T1=` cat /sys/class/net/eth0/statistics/tx_bytes ` sleep 1 R2=` cat /sys/class/net/eth0/statistics/rx_bytes ` T2=` cat /sys/class/net/eth0/statistics/tx_bytes ` TBPS=` expr $T2 - $T1` RBPS=` expr $R2 - $R1` TKBPS=` expr $TBPS / 1024` RKBPS=` expr $RBPS / 1024` echo "上传速率 eth0: $TKBPS kb/s 下载速率 eth0: $RKBPS kb/s at $(date +%Y%m%d%H:%M:%S)" >> /usr/monitor/network/network_ $( date +%Y%m%d).log |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
[root@testredis scripts] # cat tongji.sh #!/bin/bash TX=0; RX=0; MAX_TX=0; MAX_RX=0; while read line do a=` echo $line | grep "eth0" | awk '{print $3}' ` if [ $a - ge 0 ] then TX=$a if [ $TX - ge $MAX_TX ] then MAX_TX=$TX fi fi b=` echo $line | grep "eth0" | awk '{print $7}' ` if [ $b - ge 0 ] then RX=$b if [ $RX - ge $MAX_RX ] then MAX_RX=$RX fi fi done < /usr/monitor/network/network_ $( date +%Y%m%d).log echo "最高上传速度为 $MAX_TX kb/s at $(date +%Y%m%d)" >> /usr/monitor/network/tongji .log echo "最高下载速度为 $MAX_RX kb/s at $(date +%Y%m%d)" >> /usr/monitor/network/tongji .log |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
[root@Test scripts] # cat sys-warning.sh #!/bin/bash #监控系统负载与CPU、内存、硬盘、登录用户数,超出警戒值则发邮件告警。 #提取本服务器的IP地址信息 IP=` ifconfig eth0 | grep "inter addr" | cut -f 2 -d ":" | cut -f 1 -d " " ` # 1、监控系统负载的变化情况,超出时发邮件告警: #抓取cpu的总核数 cpu_num=` cat /proc/cpuinfo | grep -c "model name" ` #抓取当前系统15分钟的平均负载值 load_15=`uptime | awk '{print $12}' ` #计算当前系统单个核心15分钟的平均负载值,结果小于1.0时前面个位数补0。 average_load=` echo "scale=2;a=$load_15/$cpu_num;if(length(a)==scale(a)) print 0;print a" | bc ` #取上面平均负载值的个位整数 average_int=` echo $average_load | cut -f 1 -d "." ` #设置系统单个核心15分钟的平均负载的告警值为0.70(即使用超过70%的时候告警)。 load_warn=0.70 #当单个核心15分钟的平均负载值大于等于1.0(即个位整数大于0) ,直接发邮件告警;如果小于1.0则进行二次比较 if [ $average_int > 0 ]; then echo "$IP服务器单个核心15分钟的系统平均负载为$average_load,超过警戒值1.0,请立即处理." | mutt -s "$IP 服务器系统负载严重告警." [email protected] else #当前系统15分钟平均负载值与告警值进行比较(当大于告警值0.70时会返回1,小于时会返回0) load_now=` expr $average_load \> $load_warn` #如果系统单个核心15分钟的平均负载值大于告警值0.70(返回值为1),则发邮件给管理员 if [ $load_now == 1 ]; then echo "$IP服务器单个核心15分钟的系统平均负载为$average_load,超过警戒值0.70,请及时处理." | mutt -s "$IP 服务器系统负载告警" [email protected] fi fi # 2、监控系统cpu的情况,当使用超过80%的时候发告警邮件: #取当前空闲cpu百份比值(只取整数部分) cpu_idle=` top -b -n 1 | grep Cpu | awk '{print $5}' | cut -f 1 -d "." ` #设置空闲cpu的告警值为20%,如果当前cpu使用超过80%(即剩余小于20%),立即发邮件告警 if (($cpu_idle < 20)); then fi # 3、监控系统交换分区swap的情况,当使用超过80%的时候发告警邮件: #系统分配的交换分区总量 swap_total=` free -m | grep Swap | awk '{print $2}' ` #当前剩余的交换分区free大小 swap_free=` free -m | grep Swap | awk '{print $4}' ` #当前已使用的交换分区used大小 swap_used=` free -m | grep Swap | awk '{print $3}' ` if (($swap_used != 0)); then #如果交换分区已被使用,则计算当前剩余交换分区free所占总量的百分比,用小数来表示,要在小数点前面补一个整数位0 swap_per=0` echo "scale=2;$swap_free/$swap_total" | bc ` #设置交换分区的告警值为20%(即使用超过80%的时候告警)。 swap_warn=0.20 #当前剩余交换分区百分比与告警值进行比较(当大于告警值(即剩余20%以上)时会返回1,小于(即剩余不足20%)时会返回0 ) swap_now=` expr $swap_per \> $swap_warn` #如果当前交换分区使用超过80%(即剩余小于20%,上面的返回值等于0),立即发邮件告警 if (($swap_now == 0)); then echo "$IP服务器swap交换分区只剩下 $swap_free M 未使用,剩余不足20%,使用率已经超过80%,请及时处理。" | mutt -s "$IP 服务器内存告警" | [email protected] fi fi # 4、监控系统硬盘根分区使用的情况,当使用超过80%的时候发告警邮件: #取当前根分区(/dev/sda3)已用的百份比值(只取整数部分) disk_sda3=` df -h | grep /dev/sda3 | awk '{print $5}' | cut -f 1 -d "%" ` #设置空闲硬盘容量的告警值为80%,如果当前硬盘使用超过80%,立即发邮件告警 if (($disk_sda3 > 80)); then fi #5、监控系统用户登录的情况,当用户数超过3个的时候发告警邮件: #取当前用户登录数(只取数值部分) users =`uptime | awk '{print $6}' ` #设置登录用户数的告警值为3个,如果当前用户数超过3个,立即发邮件告警 if (($ users >= 3)); then fi |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
|
[root@Test scripts] # cat check_linux.sh #!/bin/bash os_check() { if [ -e /etc/redhat-release ]; then REDHAT=` cat /etc/redhat-release | cut -d ' ' -f1` else DEBIAN=` cat /etc/issue | cut -d ' ' -f1` fi if [ "$REDHAT" == "CentOS" -o "$REDHAT" == "Red" ]; then P_M=yum elif [ "$DEBIAN" == "Ubuntu" -o "$DEBIAN" == "ubutnu" ]; then P_M=apt-get else Operating system does not support. exit 1 fi } if [ $LOGNAME != root ]; then echo "Please use the root account operation." exit 1 fi if ! which vmstat &> /dev/null ; then echo "vmstat command not found, now the install." sleep 1 os_check $P_M install procps -y echo "-----------------------------------------------------------------------" fi if ! which iostat &> /dev/null ; then echo "iostat command not found, now the install." sleep 1 os_check $P_M install sysstat -y echo "-----------------------------------------------------------------------" fi while true ; do select input in cpu_load disk_load disk_use disk_inode mem_use tcp_status cpu_top10 mem_top10 traffic quit; do case $input in cpu_load) #CPU利用率与负载 echo "---------------------------------------" i=1 while [[ $i - le 3 ]]; do echo -e "\033[32m 参考值${i}\033[0m" UTIL=`vmstat | awk '{if(NR==3)print 100-$15"%"}' ` USER=`vmstat | awk '{if(NR==3)print $13"%"}' ` SYS=`vmstat | awk '{if(NR==3)print $14"%"}' ` IOWAIT=`vmstat | awk '{if(NR==3)print $16"%"}' ` echo "Util: $UTIL" echo "User use: $USER" echo "System use: $SYS" echo "I/O wait: $IOWAIT" i=$(($i+1)) sleep 1 done echo "---------------------------------------" break ;; disk_load) #硬盘I/O负载 echo "---------------------------------------" i=1 while [[ $i - le 3 ]]; do echo -e "\033[32m 参考值${i}\033[0m" UTIL=`iostat -x -k | awk '/^[v|s]/{OFS=": ";print $1,$NF"%"}' ` READ=`iostat -x -k | awk '/^[v|s]/{OFS=": ";print $1,$6"KB"}' ` WRITE=`iostat -x -k | awk '/^[v|s]/{OFS=": ";print $1,$7"KB"}' ` IOWAIT=`vmstat | awk '{if(NR==3)print $16"%"}' ` echo -e "Util:" echo -e "${UTIL}" echo -e "I/O Wait: $IOWAIT" echo -e "Read/s:\n$READ" echo -e "Write/s:\n$WRITE" i=$(($i+1)) sleep 1 done echo "---------------------------------------" break ;; disk_use) #硬盘利用率 DISK_LOG= /tmp/disk_use .tmp DISK_TOTAL=` fdisk -l | awk '/^Disk.*bytes/&&/\/dev/{printf $2" ";printf "%d",$3;print "GB"}' ` USE_RATE=` df -h | awk '/^\/dev/{print int($5)}' ` for i in $USE_RATE; do if [ $i -gt 90 ]; then PART=` df -h | awk '{if(int($5)==' '' $i '' ') print $6}' ` echo "$PART = ${i}%" >> $DISK_LOG fi done echo "---------------------------------------" echo -e "Disk total:\n${DISK_TOTAL}" if [ -f $DISK_LOG ]; then echo "---------------------------------------" cat $DISK_LOG echo "---------------------------------------" rm -f $DISK_LOG else echo "---------------------------------------" echo "Disk use rate no than 90% of the partition." echo "---------------------------------------" fi break ;; disk_inode) #硬盘inode利用率 INODE_LOG= /tmp/inode_use .tmp INODE_USE=` df -i | awk '/^\/dev/{print int($5)}' ` for i in $INODE_USE; do if [ $i -gt 90 ]; then PART=` df -h | awk '{if(int($5)==' '' $i '' ') print $6}' ` echo "$PART = ${i}%" >> $INODE_LOG fi done if [ -f $INODE_LOG ]; then echo "---------------------------------------" rm -f $INODE_LOG else echo "---------------------------------------" echo "Inode use rate no than 90% of the partition." echo "---------------------------------------" fi break ;; mem_use) #内存利用率 echo "---------------------------------------" MEM_TOTAL=` free -m | awk '{if(NR==2)printf "%.1f",$2/1024}END{print "G"}' ` USE=` free -m | awk '{if(NR==3) printf "%.1f",$3/1024}END{print "G"}' ` FREE=` free -m | awk '{if(NR==3) printf "%.1f",$4/1024}END{print "G"}' ` CACHE=` free -m | awk '{if(NR==2) printf "%.1f",($6+$7)/1024}END{print "G"}' ` echo -e "Total: $MEM_TOTAL" echo -e "Use: $USE" echo -e "Free: $FREE" echo -e "Cache: $CACHE" echo "---------------------------------------" break ;; tcp_status) #网络连接状态 echo "---------------------------------------" COUNT=` netstat -antp | awk '{status[$6]++}END{for(i in status) print i,status[i]}' ` echo -e "TCP connection status:\n$COUNT" echo "---------------------------------------" ;; cpu_top10) #占用CPU高的前10个进程 echo "---------------------------------------" CPU_LOG= /tmp/cpu_top .tmp i=1 while [[ $i - le 3 ]]; do #ps aux |awk '{if($3>0.1)print "CPU: "$3"% -->",$11,$12,$13,$14,$15,$16,"(PID:"$2")" |"sort -k2 -nr |head -n 10"}' > $CPU_LOG ps aux | awk '{if($3>0.1){{printf "PID: "$2" CPU: "$3"% --> "}for(i=11;i<=NF;i++)if(i==NF)printf $i"\n";else printf $i}}' | sort -k4 -nr | head -10 > $CPU_LOG #循环从11列(进程名)开始打印,如果i等于最后一行,就打印i的列并换行,否则就打印i的列 if [[ -n ` cat $CPU_LOG` ]]; then echo -e "\033[32m 参考值${i}\033[0m" cat $CPU_LOG > $CPU_LOG else echo "No process using the CPU." break fi i=$(($i+1)) sleep 1 done echo "---------------------------------------" break ;; mem_top10) #占用内存高的前10个进程 echo "---------------------------------------" MEM_LOG= /tmp/mem_top .tmp i=1 while [[ $i - le 3 ]]; do #ps aux |awk '{if($4>0.1)print "Memory: "$4"% -->",$11,$12,$13,$14,$15,$16,"(PID:"$2")" |"sort -k2 -nr |head -n 10"}' > $MEM_LOG ps aux | awk '{if($4>0.1){{printf "PID: "$2" Memory: "$3"% --> "}for(i=11;i<=NF;i++)if(i==NF)printf $i"\n";else printf $i}}' | sort -k4 -nr | head -10 > $MEM_LOG if [[ -n ` cat $MEM_LOG` ]]; then echo -e "\033[32m 参考值${i}\033[0m" cat $MEM_LOG > $MEM_LOG else echo "No process using the Memory." break fi i=$(($i+1)) sleep 1 done echo "---------------------------------------" break ;; traffic) #查看网络流量 while true ; do read -p "Please enter the network card name(eth[0-9] or em[0-9]): " eth #if [[ $eth =~ ^eth[0-9]$ ]] || [[ $eth =~ ^em[0-9]$ ]] && [[ `ifconfig |grep -c "\<$eth\>"` -eq 1 ]]; then if [ ` ifconfig | grep -c "\<$eth\>" ` - eq 1 ]; then break else echo "Input format error or Don't have the card name, please input again." fi done echo "---------------------------------------" echo -e " In ------ Out" i=1 while [[ $i - le 3 ]]; do #OLD_IN=`ifconfig $eth |awk '/RX bytes/{print $2}' |cut -d: -f2` #OLD_OUT=`ifconfig $eth |awk '/RX bytes/{print $6}' |cut -d: -f2` OLD_IN=` ifconfig $eth | awk -F '[: ]+' '/bytes/{if(NR==8)print $4;else if(NR==5)print $6}' ` #CentOS6和CentOS7 ifconfig输出进出流量信息位置不同,CentOS6中RX与TX行号等于8,CentOS7中RX行号是5,TX行号是5,所以就做了个判断. OLD_OUT=` ifconfig $eth | awk -F '[: ]+' '/bytes/{if(NR==8)print $9;else if(NR==7)print $6}' ` sleep 1 NEW_IN=` ifconfig $eth | awk -F '[: ]+' '/bytes/{if(NR==8)print $4;else if(NR==5)print $6}' ` NEW_OUT=` ifconfig $eth | awk -F '[: ]+' '/bytes/{if(NR==8)print $9;else if(NR==7)print $6}' ` IN=` awk 'BEGIN{printf "%.1f\n",' $((${NEW_IN}-${OLD_IN})) '/1024/128}' ` OUT=` awk 'BEGIN{printf "%.1f\n",' $((${NEW_OUT}-${OLD_OUT})) '/1024/128}' ` echo "${IN}MB/s ${OUT}MB/s" i=$(($i+1)) sleep 1 done echo "---------------------------------------" break ;; quit) exit 0 ;; *) echo "---------------------------------------" echo "Please enter the number." echo "---------------------------------------" break ;; esac done done |