Pandas常用操作
import pandas as pd
import numpy as np
1.讀取csv文件
csv_file = pd.read_csv('test.csv')
csv_file = pd.read_csv('test.csv',header=None)
csv_file = pd.read_csv('test.csv',names=['a','b','c','d','e'])
csv_file = pd.read_csv('test.csv',index_col=0)
csv_file = pd.read_csv('test.csv',usecols=[0,1,3])
csv_file = pd.read_csv('test.csv',dtype={'col_name1':object,'col_name2': np.float64})
csv_file = pd.read_csv('test.csv',sep='\t')
csv_file = pd.read_csv('test.csv',na_values='str')
2.寫csv文件
csv_file.to_csv('result.csv')
csv_file.to_csv('result.csv',index=False)
csv_file.to_csv('result.csv',columns=[0,1,3])
csv_file.to_csv('result.csv',header=None)
csv_file.to_csv('result.csv',sep='\t')
csv_file.to_csv('result.csv',na_rep='str')
3.DataFrame與Numpy格式的轉換
np_values = df_values.values
df_values = pd.DataFrame(np_values)
df_values = pd.DataFame(np_values,columns=[1,2,3,4,5])
df_values = pd.DataFame(np_values,index=[i for i in range(100)])
4.DataFrame數據的創建
df=pd.Dataframe(columns=[],index=[],data=[])
data = {'水果':['蘋果','梨','草莓'],
'數量':[3,2,5],
'價格':[10,9,8]}
df = DataFrame(data)
data = {'數量':{'蘋果':3,'梨':2,'草莓':5},
'價格':{'蘋果':10,'梨':9,'草莓':8}}
df = DataFrame(data)
data = {'水果':Series(['蘋果','梨','草莓']),
'數量':Series([3,2,5]),
'價格':Series([10,9,8])}
df = DataFrame(data)
5.DataFrame數據的統計性描述
df_values.describe()
df_values.describe(include=['object'])
df.describe('all')
df_values.info()
df_values.dtypes
6.DataFrame數據的查看
df_values.head()
df_values.head(n=10)
df_values.tail()
df_values.columns
df_values.index
7.DataFrame的切片操作
df_values['column_name']
df_values[df.columns[index]]
df_values.loc[index]
df_values.loc[[index],['a','b']]
df_values.loc[[index],'a':'b']
df_values.iloc[0:10,0:10]
df_values.iloc[[0,5,10],[1,8,10]]
df_values[df_values.A>0]
df_values[df_values['A'].isin(['one','two'])]
df_values['A']=np.array([1]*len(df_values))
df_values.loc[:,['a','c']]=[]
8.相關的操作(排序、合併)
df_values.sort_index(axis=1,ascending=False)
df_values.sort_values(by='column_Name',ascending=True)
pd.concat([df1[:],df2[:],...],axis=0)
pd.concat([df1,df2,...],axis=1)
df_values.append(df1[:],ignore_index=True)