# coding=utf-8
from pandas import read_csv
from pandas import datetime
def parser(x):
return datetime.strptime(x, '%Y %m %d %H')
dataset = read_csv('data_set/air_pollution.csv', parse_dates=[['year', 'month', 'day', 'hour']], index_col=0, date_parser=parser)
dataset.drop('No', axis=1, inplace=True)#axis=1,刪除列;inplace=True,直接在原DataFrame上執行刪除
#手動設置每一列的label
dataset.columns = ['pollution', 'dew', 'temp', 'press', 'wnd_dir', 'wnd_spd', 'snow', 'rain']
dataset.index.name = 'date'
#將NA替換爲0
dataset['pollution'].fillna(0, inplace=True)
#刪除最開始的24條數據
dataset = dataset[24:]
print dataset.head()
#保存處理後數據
dataset.to_csv('data_set/air_pollution_new.csv')
數據輸出
# coding=utf-8
#輸出數據曲線
#------------
from pandas import read_csv
from matplotlib import pyplot
dataset = read_csv('data_set/air_pollution_new.csv', header=0, index_col=0)
values = dataset.values
#需要輸出的列
groups = [i for i in range(8)]
groups.remove(4)#刪除值4,因爲是字符串
i=1
#輸出列曲線圖
pyplot.figure()
for group in groups:
pyplot.subplot(len(groups), 1, i)#創建len(gourps)行,1列的子圖,表示在第i個子圖畫圖
pyplot.plot(values[:,group])
pyplot.title(dataset.columns[group], y=0.5, loc='right')
i+=1
pyplot.show()