1、安裝
pip install pandas
2、數據讀取
import pandas
food_info = pandas.read_csv('food_info.csv') # 讀取CSV文件
print(type(food_info)) # DataFrame格式
print(food_info.dtypes) # 加s,包含什麼類型,例NAME object,object就是string
#print(help(pandas.read_csv))
#print(food_info)
print(food_info.head(3)) # 前三行(除標題外)
print(food_info.tail(1)) # 從後面取,1行
print(food_info.columns) # 他的具體列
print(food_info.shape) # (24, 4)維度,多少行多少列,(不含標題)
print(food_info.loc[0]) # 第一行的數據(除標題外)
print(food_info["NUMBER"]) # 把某一列的數據取出來,兩列["",""]
print(food_info.columns.tolist()) # 把列變爲list形式
3、數據處理
import pandas
import numpy
food_info = pandas.read_csv('food_info.csv')
print(food_info['NUMBER'] / 10) # 每個數據都除以10
food_info.sort_values('WATER', inplace=True) # 排序默認升序,inplace=true,在原始數據上修改(food_info),false,生成新的,不修改原始數據
print(food_info['WATER'])
food_info.sort_values('WATER', inplace=True,ascending=False) # 降序
print(food_info['WATER'])
suger = food_info['SUGER'] # 取出suger列
suger_is_null = pandas.isnull(suger) # 判斷是否爲空,空爲true,[false,false,true,,,]
#print(suger_is_null)
suger_null = suger[suger_is_null] # 取出爲空的值,就是取出索引爲true的值
print(suger_null)
print(len(suger_null))
print(food_info['SUGER'].mean()) # 求平均值,會自動過濾掉空的值
# 類型是1,2,3,result結果的平均值==(1+0+1)/3
# 透視表,(以哪個指標,指標的結果,以什麼方式(平均值)默認均值)
result_ok = food_info.pivot_table(index="CLASS", values="RESULT", aggfunc=numpy.mean)
print(result_ok) # 1 0.588235 2 0.250000 3 0.666667
# 有缺失值,扔掉,按列,根據name和suger
food = food_info.dropna(axis=0, subset=['NAME', 'SUGER'])
print(food.head(5))
food_index = food.reset_index(drop=True) # 重置原來的索引值,會從0開始
print(food_index.head(5))
# food_info.apply(函數:對數據處理)
def which_null(row):
class1 = row['NAME']
if pandas.isnull(class1):
return 'Unknown'
else:
return class1
classes = food_info.apply(which_null, axis=1) # 按行執行
print(classes)
4、series類型
import pandas as pd
from pandas import Series
food_info = pd.read_csv('food_info.csv')
# series結構,是dataframe的子結構,比如,一行一列,幾行幾列,就是series
series_name = food_info['NAME']
print(type(series_name))
# 造出一個series,index是name的值,值是suger的值
ser = Series(food_info['SUGER'].values, index=food_info['NAME'])
print(ser)
print(food_info['SUGER']) # 這是series類型
print(food_info['SUGER'].values) # 這是list類型