1、安装
pip install pandas
2、数据读取
import pandas
food_info = pandas.read_csv('food_info.csv') # 读取CSV文件
print(type(food_info)) # DataFrame格式
print(food_info.dtypes) # 加s,包含什么类型,例NAME object,object就是string
#print(help(pandas.read_csv))
#print(food_info)
print(food_info.head(3)) # 前三行(除标题外)
print(food_info.tail(1)) # 从后面取,1行
print(food_info.columns) # 他的具体列
print(food_info.shape) # (24, 4)维度,多少行多少列,(不含标题)
print(food_info.loc[0]) # 第一行的数据(除标题外)
print(food_info["NUMBER"]) # 把某一列的数据取出来,两列["",""]
print(food_info.columns.tolist()) # 把列变为list形式
3、数据处理
import pandas
import numpy
food_info = pandas.read_csv('food_info.csv')
print(food_info['NUMBER'] / 10) # 每个数据都除以10
food_info.sort_values('WATER', inplace=True) # 排序默认升序,inplace=true,在原始数据上修改(food_info),false,生成新的,不修改原始数据
print(food_info['WATER'])
food_info.sort_values('WATER', inplace=True,ascending=False) # 降序
print(food_info['WATER'])
suger = food_info['SUGER'] # 取出suger列
suger_is_null = pandas.isnull(suger) # 判断是否为空,空为true,[false,false,true,,,]
#print(suger_is_null)
suger_null = suger[suger_is_null] # 取出为空的值,就是取出索引为true的值
print(suger_null)
print(len(suger_null))
print(food_info['SUGER'].mean()) # 求平均值,会自动过滤掉空的值
# 类型是1,2,3,result结果的平均值==(1+0+1)/3
# 透视表,(以哪个指标,指标的结果,以什么方式(平均值)默认均值)
result_ok = food_info.pivot_table(index="CLASS", values="RESULT", aggfunc=numpy.mean)
print(result_ok) # 1 0.588235 2 0.250000 3 0.666667
# 有缺失值,扔掉,按列,根据name和suger
food = food_info.dropna(axis=0, subset=['NAME', 'SUGER'])
print(food.head(5))
food_index = food.reset_index(drop=True) # 重置原来的索引值,会从0开始
print(food_index.head(5))
# food_info.apply(函数:对数据处理)
def which_null(row):
class1 = row['NAME']
if pandas.isnull(class1):
return 'Unknown'
else:
return class1
classes = food_info.apply(which_null, axis=1) # 按行执行
print(classes)
4、series类型
import pandas as pd
from pandas import Series
food_info = pd.read_csv('food_info.csv')
# series结构,是dataframe的子结构,比如,一行一列,几行几列,就是series
series_name = food_info['NAME']
print(type(series_name))
# 造出一个series,index是name的值,值是suger的值
ser = Series(food_info['SUGER'].values, index=food_info['NAME'])
print(ser)
print(food_info['SUGER']) # 这是series类型
print(food_info['SUGER'].values) # 这是list类型