>>>import numpy as np
>>>import pandas as pd
>>> s = pd.Series(np.random.rand(4))# 數據結構類似於字典>>>print(s,type(s))00.01127610.51848720.40496830.912360
dtype: float64 <class'pandas.core.series.Series'>>>> dic ={'a':1,'b':2,'c':3,'4':4,'5':5}# 可由字典創建>>> s = pd.Series(dic)>>>print(s)4455
a 1
b 2
c 3
dtype: int64
>>> s2 = pd.Series(np.random.randn(5),name ='test')# 可命名>>>print(s2.name)
test
>>> s3 = s2.rename('valid')# 重命名>>>print(s3.name)
valid
>>>print(s2.name)# 傳值非傳址
test
2 二維數組DateFrame
>>> data ={'name':['A','B','C'],'age':[1,2,3],'gender':['m','m','m']}>>> excel = pd.DataFrame(data)# 由數組/列表組成的字典創建這個表格式數據結構>>>print(excel,type(excel))
age gender name
01 m A
12 m B
23 m C <class'pandas.core.frame.DataFrame'>>>> data ={'one':pd.Series(np.random.rand(2), index =['a','b']),'two':pd.Series(np.random.rand(3),index =['a','b','c'])}>>> excel = pd.DataFrame(data)# 由一維數組Series創建>>>print(excel,type(excel))
one two
a 0.5219760.286897
b 0.4342870.681197
c NaN 0.100178<class'pandas.core.frame.DataFrame'>>>> ar = np.random.rand(9).reshape(3,3)>>> df = pd.DataFrame(ar, index =['a','b','c'], columns =['one','two','three'])# 由二維數組創建,可以指定行/列標籤>>>print(df,type(df))
one two three
a 0.5338960.1585770.201476
b 0.8772980.4514430.643094
c 0.0120910.5696780.778727<class'pandas.core.frame.DataFrame'>
3 索引與切片
>>> df = pd.DataFrame(np.random.rand(12).reshape(3,4),index =['one','two','three'],columns =['a','b','c','d'])>>> data1 = df['a']# 列一維索引,輸出Series>>>print(data1,type(data1))
one 0.681222
two 0.948255
three 0.244360
Name: a, dtype: float64 <class'pandas.core.series.Series'>>>> data2 = df[['a','c']]# 列二維索引,輸出DataFrame>>>print(data2,type(data2))
a c
one 0.6812220.510396
two 0.9482550.288504
three 0.2443600.802351<class'pandas.core.frame.DataFrame'>>>> data3 = df.loc['one']# 行一維索引,輸出Series>>>print(data2,type(data3))
a c
one 0.6812220.510396
two 0.9482550.288504
three 0.2443600.802351<class'pandas.core.series.Series'>>>> data4 = df.loc[['one','two']]# 行二維索引,輸出DataFrame>>>print(data3,type(data4))
a 0.681222
b 0.402983
c 0.510396
d 0.855539
Name: one, dtype: float64 <class'pandas.core.frame.DataFrame'>>>>print(df.iloc[-1])# 單行索引
a 0.244360
b 0.375580
c 0.802351
d 0.299126
Name: three, dtype: float64
>>>print(df.iloc[[2,1]])# 多行索引
a b c d
three 0.2443600.3755800.8023510.299126
two 0.9482550.0900730.2885040.984931>>>print(df.iloc[::2])# 切片索引
a b c d
one 0.6812220.4029830.5103960.855539
three 0.2443600.3755800.8023510.299126>>>print(df['a'].loc[['one','three']])# 行列同時索引
one 0.681222
three 0.244360
Name: a, dtype: float64
>>>print(df[['b','c','d']].iloc[::2])# 行列同時索引
b c d
one 0.4029830.5103960.855539
three 0.3755800.8023510.299126