import pandas
import numpy as np
from pandas import Series , DataFrame
s = Series([1,2,3,np.nan,5,1])
print(s)
0 1.0
1 2.0
2 3.0
3 NaN
4 5.0
5 1.0
dtype: float64
numpy.random.randn() :以給定的形狀創建一個數組,數組元素來符合標準正態分佈N(0,1)
a = np.random.randn(10)
s = Series(a , name='Series 1')
print(s)
0 1.285564
1 -0.189391
2 -1.730828
3 -1.972853
4 0.104808
5 -0.225108
6 1.045197
7 -1.531798
8 -1.420962
9 -2.423736
Name: Series 1, dtype: float64
d = {'a':1 , 'b':2 , 'c':3}
s = Series( d , name='Series from dict' )
print(s)
a 1
b 2
c 3
Name: Series from dict, dtype: int64
s = Series(1.5, index=['a','b','c','d','e','f','g'])
print(s)
a 1.5
b 1.5
c 1.5
d 1.5
e 1.5
f 1.5
g 1.5
dtype: float64
d = {'a':1,'b':2,'c':3}
s = Series(d,name='Seris from dict',index=['a','c','d','b'])
print(s)
a 1.0
c 3.0
d NaN
b 2.0
Name: Seris from dict, dtype: float64
d = {'c_one':[1,2,3,4] , 'c_two':[4,3,2,1]}
df = DataFrame( d , index=['id1','id2','id3','id4'])
print(df)
c_one c_two
id1 1 4
id2 2 3
id3 3 2
id4 4 1
d = {'one': Series([1,2,3] , index=['a','b','c']),
'two': Series([1,2,3,4] , index=['a','b','c','d'])}
df = DataFrame(d)
print(df)
print(df.index)
print(df.columns)
print(df.values)
one two
a 1.0 1
b 2.0 2
c 3.0 3
d NaN 4
Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['one', 'two'], dtype='object')
[[ 1. 1.]
[ 2. 2.]
[ 3. 3.]
[nan 4.]]
d = DataFrame(np.arange(10).reshape(2,5),columns=['c1','c2','c3','c4','c5'],index=['i1','i2'])
print(d)
c1 c2 c3 c4 c5
i1 0 1 2 3 4
i2 5 6 7 8 9
d = {'c_one':[1,2,3,4],'c_two':[4,3,2,1]}
df = DataFrame( d , index=['id1','id2','id3','id4'])
print(df)
c_one c_two
id1 1 4
id2 2 3
id3 3 2
id4 4 1
print(df.sort_index(axis=0,ascending=False)) # axis = 0 按索引(行名)排序,降序
print(df.sort_values(by='c_two')) # 按c_two列的值排序,升序
print(df.sort_values(by='c_one'))
dates = pandas.date_range('20200101',periods=6)
df = DataFrame(np.arange(24).reshape(6,4),index=dates,columns=['A','B','C','D'])
print(df)
# 訪問A列
print(df.A)
print(df['A'])
訪問前三行
print(df[0:3])
# 訪問前三列
print(df[['A','B','C']])
# 按列名、行名訪問
print(df['A']['2020-01-02'])
# pandas的ix已刪除,不能使用
# iloc,即index locate 用index索引進行定位,所以參數是整型,如:df.iloc[10:20, 3:5]
# loc,則可以使用column名和index名進行定位
print(df.loc['2020-01-03']) # 按行名訪問
print(df.loc[:,['A','C']]) # 訪問所有行A、C兩列
print(df.loc['2020-01-01',['A','D']]) # 訪問2020-01-01的A、D兩列
print(df.iloc[0,0]) # 按下標
print(df.iloc[[1,3],1]) # 訪問2、4行第2列元素
print(df[df.B>5]) # B列值>5的數據
# Pandas繪圖( plt用來顯示
from matplotlib import pyplot as plt
df = DataFrame(abs(np.random.randn(4,5)),columns=['St','Doc','Tea','Dri','Tra'],
index=['bj','sh','hz','sz'])
df.plot(kind='bar') # kind : str # 繪圖類型,bar:條狀圖
plt.show() # show pic
Pandas - Series、DataFrame、plot(demo
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.