爬取公司網絡流量去年到今年的數據,對數據進行統計處理

每天的數據差不多是這樣的:

wKiom1f6C-SB44tlAADYKNZbbTE645.png-wh_50


抓取網頁數據,進行保存:

import urllib
import time
import calendar
year_list=[2016]
month_list=[1,2,3,4,5,6,7,8,9,10,11,12]
for year in year_list:
    if year==2015:
        for month in month_list[6:]: 
            
            
            days=range(calendar.monthrange(year, month)[1]+1)[1:]
            for day in days:
                
                date1=str(year)+"-"+str(month)+"-"+str(day)
                date2=time.strptime(date1, "%Y-%m-%d")
                datestring=time.strftime("%Y-%m-%d",date2)
                url="https://myview.chinanetcenter.com/api/bandwidth-channel.action?u=howbuy&p=Howbuy123&date="+datestring
                filename = urllib.urlretrieve(url,filename="E:\\xml\\2015\\"+datestring+".xml") #modify path
                time.sleep(5)

    else:
        
            
        days=days=range(calendar.monthrange(year, 6)[1]+1)[7:]
        for day in days:
                
            date1=str(year)+"-"+str(6)+"-"+str(day)
            date2=time.strptime(date1, "%Y-%m-%d")
            datestring=time.strftime("%Y-%m-%d",date2)
            url="https://myview.chinanetcenter.com/api/bandwidth-channel.action?u=howbuy&p=Howbuy123&date="+datestring
            filename = urllib.urlretrieve(url,filename="E:\\xml\\2016\\"+datestring+".xml") #modify path
            time.sleep(5)


處理每天數據的平均值,最大值,最小值一直大於130的值統計處理,並且生成xls表格

import xml.dom.minidom
import urllib
import time
import calendar
import os
import xlrd
import xlwt
from xlwt import *
value_list=[]
datalist=[]
data=()

for path,dir,filenames in os.walk("E:\\zh\\"):#modify path
    print filenames
    print path
    print dir
    for filename in filenames:
        
        dom = xml.dom.minidom.parse("E:\\zh\\"+filename) #modify path
        root = dom.documentElement
        bands = root.getElementsByTagName('bandwidth')
    
        for i in range(bands.length):
        
            bandschild=bands[i]
            bandschildvalue=float(bandschild.firstChild.data)
            value_list.append(bandschildvalue)
        vmax=max(value_list)
        #average=reduce(lambda x,y: x+y, value_list)/len(value_list)
        average=sum(value_list)/len(value_list)    
        data=(filename.strip(".xml"),vmax,average)
        datalist.append(data)
        value_list=[]
        data=()
        file = xlwt.Workbook()
        table = file.add_sheet('tongji')
        for i in range(len(datalist)):
            daydate,daymax,dayv=datalist[i]
            table.write(i,0,daydate)
            table.write(i,1,daymax)
            table.write(i,2,dayv)
        file.save('E:\\mini6.xls') #modify path
import xml.dom.minidom
import urllib
import time
import calendar
import os
import xlrd
import xlwt
from xlwt import *
value_list=[]
datalist=[]
data=()

for path,dir,filenames in os.walk("E:\\zh\\2016\\"):#modify path
    
    for filename in filenames:
        
        dom = xml.dom.minidom.parse("E:\\zh\\2016\\"+filename) #modify path
        root = dom.documentElement
        bands = root.getElementsByTagName('bandwidth')
    
        for i in range(bands.length):
        
            bandschild=bands[i]
            bandschildvalue=float(bandschild.firstChild.data)
            
                
                
            value_list.append(bandschildvalue)
        #vmax=max(value_list)
        #average=reduce(lambda x,y: x+y, value_list)/len(value_list)
        #average=sum(value_list)/len(value_list)
        for ii in value_list:
            
            data=(filename.strip(".xml"),ii)
            datalist.append(data)
            data=()
        value_list=[]
       
        file = xlwt.Workbook()
        table = file.add_sheet('tongji')
        for iii in range(len(datalist)):
            daydate,daycount=datalist[iii]
            table.write(iii,0,daydate)
            table.write(iii,1,daycount)
            
        file.save('E:\\mini14.xls') #modify path

wKioL1f6DRbQSxUfAADpiAgWbDs678.png-wh_50

wKioL1f6DVfiAn36AADpiAgWbDs385.png-wh_50

wKiom1f6DViA12X4AAD9Tt31TII497.png-wh_50

wKiom1f6DXbjxjAHAAEq4bsyifk521.png-wh_50


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章