讀取文檔
food_info=pandas.read_csv("food_info.csv")
print(type(food_info))
print(food_info.dtypes)
基本操作
print(help(pandas.read_csv))
food_info.head(3)
food_info.tail(5)
food_info.columns
food_info.shape
food_info.loc[0]
food_info.loc[3:6]
food_info.loc[[2,5,10]]
ndb_col=food_info["NDB_No"]
print(ndb_col)
columns=["Zinc_(mg)","Copper_(mg)"]
zinc_copper=food_info[columns]
print(zinc_copper)
col_names=food_info.columns.tolist()
gram_columns=[]
for c in col_names:
if c.endswith("(g)"):
gram_columns.append(c)
gram_df=food_info[gram_columns]
print(gram_df.head(3))
water_enger=food_info["Water_(g)"]*food_info["Energ_Kcal"]
iron_grams=food_info["Iron_(mg)"]/1000
food_info["Iron_(g)"]=iron_grams
food_info.sort_values("Sodium_(mg)",inplace=True)
print(food_info["Sodium_(mg)"])
food_info.sort_values("Sodium_(mg)",inplace=True,ascending=False)
觀察屬性
import pandas as pd
import numpy as np
titanic_survival=pd.read_csv("titanic_train.csv")
titanic_survival.head
age=titanic_survival["Age"]
age_is_null=pd.isnull(age)
age_null_true=age[age_is_null]
age_null_count=len(age_null_true)
清除缺失值
mean_age=sum(titanic_survival["Age"])/len(titanic_survival["Age"])
print(mean_age)
good_ages=titanic_survival["Age"][age_is_null==False]
correct_mean_age=sum(good_ages)/len(good_ages)
print(correct_mean_age)
correct_mean_age=titanic_survival["Age"].mean()
print(correct_mean_age)
求某個屬性值的平均值、和
passenger_classes=[1,2,3]
fares_by_class={}
for this_class in passenger_classes:
pclass_rows=titanic_survival[titanic_survival["Pclass"]==this_class]
pclass_fares=pclass_rows["Fare"]
fare_for_class=pclass_fares.mean()
fares_by_class[this_class]=fare_for_class
print(fares_by_class)
port_stats=titanic_survival.pivot_table(index="Embarked",values=["Fare",
"Survived"],
aggfunc=np.sum)
print(port_stats)
passenger_age=titanic_survival.pivot_table(index="Embarked",values="Age")
print(passenger_age)
去除缺失值
drop_na_columns=titanic_survival.dropna(axis=1)
new_titanic_survival=titanic_survival.dropna(axis=0,subset=["Age","Sex"])
定位
row_index_83_age=titanic_survival.loc[83,"Age"]
row_index_1000_pclass=titanic_survival.loc[766,"Pclass"]
print(row_index_83_age)
print(row_index_1000_pclass)
排序
new_titanic_survival=titanic_survival.sort_values("Age",ascending=False)
titanic_reindex=new_titanic_survival.reset_index(drop=True)