根據train的屬性找到路徑,把數據讀取出來
import numpy as np
import pandas as pd
data=pd.read_csv(r'C:\Users\Administrator\Desktop\train.csv')
data
查看是否存在空值,每列空值個數
data.isnull().sum()
判斷Age中的空值,用isnull,TRUE表示爲空值
用衆數填充Age中的空值
data.Age.value_counts()
data.Age.isnull()
data.Age.fillna(24.00,inplace=True)
統計出男女比例
data.Sex.value_counts()/len(data.Sex)
統計出存活與死亡的比例
data.Survived.value_counts()/len(data.Survived)
找出已知年齡最大最小值
data.Age.max()
data.Age.min()
畫出男女存活死亡圖
import matplotlib.pyplot as plt
smale= data.Survived[data.Sex=='male'].value_counts()
sfemale= data.Survived[data.Sex=='female'].value_counts()
df = pd.DataFrame({u'male':smale,u'female':sfemale})
df.plot(kind = 'bar')
plt.xlabel(u'survived')
plt.ylabel(u'number')
plt.title(u"Bar Chart of Men and Women's Survival and Death ")
plt.show()
將數據存成csv格式文件
data1 = pd.DataFrame(data)
data1.to_csv(r'C:\Users\Administrator\Desktop\data1.csv')
data1