最近發現 Python 可以做很多事情,在監控服務器有其獨特的優勢,耗費資源少,開發週期短。
首先我們做一個定時或者實時腳本timedtask.py,讓其定時監控目標服務器,兩種方式:
第一種:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time, os
from monitorserver import alltask
def roll_back(cmd, inc = 60):
while True:
#執行方法,函數
alltask()
time.sleep(inc)
roll_back("echo %time%", 5)
第二種:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time, os
def roll_back(cmd, inc = 60):
while True:
#監控代碼文件所在位置
os.system('python /home/../monitorserver.py');
time.sleep(inc)
roll_back("echo %time%", 5)
做過監控應該都知道,我們主要監控服務器,負載均衡、磁盤、內存、CPU、網絡接口(流量)、端口代碼,主要針對這些,我做了以下遠程監控,第一種和第二種監控代碼一樣,代碼monitorserver.py如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pexpect
import re
import time
import threading
"""
主方法
127.0.0.1#遠程服務器ip地址
"""
def ssh_command(user, host, password, command):
ssh_new_key = 'Are you sure you want to continue connecting'
child = pexpect.spawn('ssh -l %s %s %s' % (user, host, command))
i = child.expect([pexpect.TIMEOUT, ssh_new_key, 'password: '])
if i == 0:
print 'ERROR!'
print 'SSH could not login. Here is what SSH said:'
print child.before, child.after
return None
if i == 1:
child.sendline('yes')
child.expect('password: ')
i = child.expect([pexpect.TIMEOUT, 'password: '])
if i == 0:
print 'ERROR!'
print 'SSH could not login. Here is what SSH said:'
print child.before, child.after
return None
child.sendline(password)
return child
"""
內存監控
"""
def mem_info():
child = ssh_command("遠程服務器用戶名", "127.0.0.1", "遠程服務器密碼", "cat /proc/meminfo")
child.expect(pexpect.EOF)
mem = child.before
mem_values = re.findall("(\d+)\ kB", mem)
MemTotal = mem_values[0]
MemFree = mem_values[1]
Buffers = mem_values[2]
Cached = mem_values[3]
SwapCached=mem_values[4]
SwapTotal = mem_values[13]
SwapFree = mem_values[14]
print '******************************內存監控*********************************'
print "*******************時間:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
print "總內存:",MemTotal
print "空閒內存:", MemFree
print "給文件的緩衝大小:",Buffers
print "高速緩衝存儲器使用的大小:", Cached
print "被高速緩衝存儲用的交換空間大小:", SwapCached
print "給文件的緩衝大小:", Buffers
if int(SwapTotal) == 0:
print u"交換內存總共爲:0"
else:
Rate_Swap = 100 - 100*int(SwapFree)/float(SwapTotal)
print u"交換內存利用率:", Rate_Swap
Free_Mem = int(MemFree) + int(Buffers) + int(Cached)
Used_Mem = int(MemTotal) - Free_Mem
Rate_Mem = 100*Used_Mem/float(MemTotal)
print u"內存利用率:", str("%.2f" % Rate_Mem), "%"
"""
內核線程、虛擬內存、磁盤、陷阱和 CPU 活動的統計信息
"""
def vm_stat_info():
child = ssh_command("遠程服務器用戶名", "127.0.0.1", "遠程服務器密碼", "vmstat 1 2 | tail -n 1")
child.expect(pexpect.EOF)
vmstat_info = child.before.strip().split()
processes_waiting = vmstat_info[0]
processes_sleep = vmstat_info[1]
swpd = vmstat_info[2]
free = vmstat_info[3]
buff = vmstat_info[4]
cache = vmstat_info[5]
si = vmstat_info[6]
so = vmstat_info[7]
io_bi = vmstat_info[8]
io_bo = vmstat_info[9]
system_interrupt = vmstat_info[10]
system_context_switch = vmstat_info[11]
cpu_user = vmstat_info[12]
cpu_sys = vmstat_info[13]
cpu_idle = vmstat_info[14]
cpu_wait = vmstat_info[15]
st=vmstat_info[16]
print '****************************內核線程、虛擬內存、磁盤、陷阱和 CPU 活動的統計信息監控****************************'
print "*******************時間:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
print "等待運行進程的數量:", processes_waiting
print "處於不間斷狀態的進程:", processes_sleep
print "使用虛擬內存(swap)的總量:", swpd
print "空閒的內存總量:", free
print "用作緩衝的內存總量:", buff
print "用作緩存的內存總量:", cache
print "交換出內存總量 :", si
print "交換入內存總量 :", so
print "從一個塊設備接收:", io_bi
print "發送到塊設備:", io_bo
print "每秒的中斷數:", system_interrupt
print "每秒的上下文切換數:", system_context_switch
print "用戶空間上進程運行的時間百分比:", cpu_user
print "內核空間上進程運行的時間百分比:", cpu_sys
print "閒置時間百分比:", cpu_idle
print "等待IO的時間百分比:", cpu_wait
print "從虛擬機偷取的時間百分比:", st
'''
cpu監控
'''
def cpu_info():
child = ssh_command("遠程服務器用戶名", "127.0.0.1", "遠程服務器密碼", "cat /proc/cpuinfo")
child.expect(pexpect.EOF)
cpuinfo = child.before
cpu_num = re.findall('processor.*?(\d+)', cpuinfo)[-1]
cpu_num = str(int(cpu_num) + 1)
print '***************************************cpu監控***************************************'
print "*******************時間:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
print u"CPU數目:", cpu_num
li = cpuinfo.replace('\t', '').split('\r')
CPUinfo = {}
procinfo = {}
nprocs = 0
for line in li:
if line.find("processor") > -1:
CPUinfo['CPU%s' % nprocs] = procinfo
nprocs = nprocs + 1
else:
if len(line.split(':')) == 2:
procinfo[line.split(':')[0].strip()] = line.split(':')[1].strip()
else:
procinfo[line.split(':')[0].strip()] = ''
for processor in CPUinfo.keys():
print "CPU屬於的名字及其編號、標稱主頻:",CPUinfo[processor]['model name']
print "CPU屬於其系列中的哪一代的代號:", CPUinfo[processor]['model']
print "CPU製造商:", CPUinfo[processor]['vendor_id']
print "CPU產品系列代號:", CPUinfo[processor]['cpu family']
print "CPU的實際使用主頻:", CPUinfo[processor]['cpu MHz']
"""
負載均衡
"""
def load_stat():
child = ssh_command("遠程服務器用戶名", "127.0.0.1", "遠程服務器密碼", "cat /proc/loadavg")
child.expect(pexpect.EOF)
loadavgs = child.before.strip().split()
print '************************負載均衡監控****************************'
print "*******************時間:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
print "系統5分鐘前的平均負載:", loadavgs[0]
print "系統10分鐘前的平均負載:", loadavgs[1]
print "系統15分鐘前的平均負載:", loadavgs[2]
print "分子是正在運行的進程數,分母爲總進程數:",loadavgs[3]
print "最近運行的進程id:", loadavgs[4]
"""
獲取網絡接口的輸入和輸出
"""
def ionetwork():
child = ssh_command("遠程服務器用戶名", "127.0.0.1", "遠程服務器密碼", "cat /proc/net/dev")
child.expect(pexpect.EOF)
netdata = child.before
li = netdata.strip().split('\n')
print '************************獲取網絡接口的輸入和輸出監控****************************'
print "*******************時間:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
net = {}
for line in li[2:]:
line = line.split(":")
eth_name = line[0].strip()
# if eth_name != 'lo':
net_io = {}
net_io['Receive'] = round(float(line[1].split()[0]) / (1024.0 * 1024.0), 2)
net_io['Transmit'] = round(float(line[1].split()[8]) / (1024.0 * 1024.0), 2)
net[eth_name] = net_io
print net
"""
磁盤空間監控
"""
def disk_stat():
child = ssh_command("遠程服務器用戶名", "127.0.0.1", "遠程服務器密碼", "df -h")
child.expect(pexpect.EOF)
disk = child.before
disklist = disk.strip().split('\n')
disklists=[]
for disk in disklist:
disklists.append(disk.strip().split())
print '************************磁盤空間監控****************************'
print "*******************時間:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
for i in disklists[1:]:
print "\t文件系統:", i[0],
print "\t容量:", i[1],
print "\t已用:", i[2],
print "\t可用:", i[3],
print "\t已用%掛載點:", i[4]
"""
端口監控
一般是遠程服務器用戶名用戶
"""
def getComStr():
child = ssh_command("遠程服務器用戶名", "127.0.0.1", "遠程服務器密碼", "netstat -tpln")
child.expect(pexpect.EOF)
Com = child.before
print '******************************端口監控*********************************'
print "*******************時間:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
print Com
"""
獲取網絡接口的輸入和輸出
"""
def cpu():
child = ssh_command("遠程服務器用戶名", "127.0.0.1", "遠程服務器密碼", 'cat /proc/stat | grep "cpu "')
child.expect(pexpect.EOF)
child1 = ssh_command("遠程服務器用戶名", "127.0.0.1", "遠程服務器密碼", 'cat /proc/stat | grep "cpu "')
child1.expect(pexpect.EOF)
cpus = child.before.strip().split()
cpus1 = child1.before.strip().split()
print '************************cpu使用情況****************************'
print "*******************時間:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
T1=int(cpus[1])+int(cpus[2])+int(cpus[3])+int(cpus[4])+int(cpus[5])+int(cpus[6])+int(cpus[8])+int(cpus[9])
T2=int(cpus1[1]) + int(cpus1[2]) + int(cpus1[3]) + int(cpus1[4] )+ int(cpus1[5] )+int( cpus1[6] )+ int(cpus1[8] )+ int(cpus1[9])
Tol=T2-T1
Idle=int(cpus1[4]) - int(cpus[4])
print '總的cpu時間1:',T1
print '總的cpu時間2:', T2
print '時間間隔內的所有時間片:', Tol
print '計算空閒時間idle:', Idle
print "計算cpu使用率:",100*(Tol-Idle)/Tol,"%"
"""
第一種執行
"""
def alltask():
try:
threads = []
t1 = threading.Thread(target=mem_info)
threads.append(t1)
t2 = threading.Thread(target=vm_stat_info)
threads.append(t2)
t3 = threading.Thread(target=cpu_info)
threads.append(t3)
t4 = threading.Thread(target=load_stat)
threads.append(t4)
t5 = threading.Thread(target=ionetwork)
threads.append(t5)
t6 = threading.Thread(target=disk_stat)
threads.append(t6)
t7 = threading.Thread(target=getComStr)
threads.append(t7)
t8 = threading.Thread(target=cpu)
threads.append(t8)
for n in range(len(threads)):
threads[n].start()
except Exception, e:
print str(e)
"""
第二種執行
"""
if __name__ == '__main__':
try:
threads = []
t1 = threading.Thread(target=mem_info)
threads.append(t1)
t2 = threading.Thread(target=vm_stat_info)
threads.append(t2)
t3 = threading.Thread(target=cpu_info)
threads.append(t3)
t4 = threading.Thread(target=load_stat)
threads.append(t4)
t5 = threading.Thread(target=ionetwork)
threads.append(t5)
t6 = threading.Thread(target=disk_stat)
threads.append(t6)
t7 = threading.Thread(target=getComStr)
threads.append(t7)
t8 = threading.Thread(target=cpu)
threads.append(t8)
for n in range(len(threads)):
threads[n].start()
except Exception, e:
print str(e)
接下來做的是把監控結果可視化。。。