請關注微信公衆號:機器知心媛,每天進步一點點
import pandas as pd
import numpy as np
s=pd.Series([1,3,6,np.nan,44,1])
print(s)
# 這裏我們沒有給數據指定索引
# 會自動創建一個0到N-1的整數型索引
# 0 1.0
# 1 3.0
# 2 6.0
# 3 NaN
# 4 44.0
# 5 1.0
# dtype: float64
datas=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=datas,columns=['a','b','c','d'])
#DataFrame是一個表格型的數據結構,包含一組有序的列
#既有行索引又有列索引
print(df)
# a b c d
# 2016-01-01 1.585208 -0.990413 1.236099 -0.388732
# 2016-01-02 0.495724 0.059505 0.416845 -0.181266
# 2016-01-03 -1.859331 0.071485 -0.665165 0.110948
# 2016-01-04 2.665616 -0.974616 0.047750 -0.623723
# 2016-01-05 -0.618240 -0.770843 0.779370 -1.378176
# 2016-01-06 -0.101552 1.311490 -0.665657 0.854078
print(df['b'])
# 2016-01-01 -2.038565
# 2016-01-02 0.378850
# 2016-01-03 0.354676
# 2016-01-04 0.096856
# 2016-01-05 2.455016
# 2016-01-06 0.702862
# Freq: D, Name: b, dtype: float64
df1=pd.DataFrame(np.arange(12).reshape((3,4)))
print(df1)
# 0 1 2 3
# 0 0 1 2 3
# 1 4 5 6 7
# 2 8 9 10 11
df2 = pd.DataFrame({'A': 1.,
'B': pd.Timestamp('20130102'),
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
'D': np.array([3] * 4, dtype='int32'),
'E': pd.Categorical(["test", "train", "test", "train"]),
'F': 'foo'})
print(df2)
# A B C D E F
# 0 1.0 2013-01-02 1.0 3 test foo
# 1 1.0 2013-01-02 1.0 3 train foo
# 2 1.0 2013-01-02 1.0 3 test foo
# 3 1.0 2013-01-02 1.0 3 train foo
print(df2.dtypes)
# A float64
# B datetime64[ns]
# C float32
# D int32
# E category
# F object
# dtype: object
print(df2.index)
# Int64Index([0, 1, 2, 3], dtype='int64')
print(df2.columns)
# Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')
print(df2.values)
# [[1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo']
# [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']
# [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo']
# [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']]
print(df2.describe())
# A C D
# count 4.0 4.0 4.0
# mean 1.0 1.0 3.0
# std 0.0 0.0 0.0
# min 1.0 1.0 3.0
# 25% 1.0 1.0 3.0
# 50% 1.0 1.0 3.0
# 75% 1.0 1.0 3.0
# max 1.0 1.0 3.0
print(df2.T)#轉置
#對index進行排序
print(df2.sort_index(axis=1,ascending=False))
# F E D C B A
# 0 foo test 3 1.0 2013-01-02 1.0
# 1 foo train 3 1.0 2013-01-02 1.0
# 2 foo test 3 1.0 2013-01-02 1.0
# 3 foo train 3 1.0 2013-01-02 1.0
#對數據值進行排序
print(df2.sort_values(by='B'))
# A B C D E F
# 0 1.0 2013-01-02 1.0 3 test foo
# 1 1.0 2013-01-02 1.0 3 train foo
# 2 1.0 2013-01-02 1.0 3 test foo
# 3 1.0 2013-01-02 1.0 3 train foo
import numpy as np
import pandas as pd
dates=pd.date_range('20190101',periods=6)
df=pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])
print(df['A'])
# print(df.A)
# 2019-01-01 0
# 2019-01-02 4
# 2019-01-03 8
# 2019-01-04 12
# 2019-01-05 16
# 2019-01-06 20
# Freq: D, Name: A, dtype: int32
print(df[0:3])
# A B C D
# 2019-01-01 0 1 2 3
# 2019-01-02 4 5 6 7
# 2019-01-03 8 9 10 11
print(df["20190102":"20190104"])
# A B C D
# 2019-01-02 4 5 6 7
# 2019-01-03 8 9 10 11
# 2019-01-04 12 13 14 15
#通過標籤名字選取某一行數據
print(df.loc['20190101'])
# A 0
# B 1
# C 2
# D 3
# Name: 2019-01-01 00:00:00, dtype: int32
print(df.loc[:,'A':'B'])
# A B
# 2019-01-01 0 1
# 2019-01-02 4 5
# 2019-01-03 8 9
# 2019-01-04 12 13
# 2019-01-05 16 17
# 2019-01-06 20 21
print(df.loc['20190102',['A','B']])
# A 4
# B 5
#根據位置選擇iloc
print(df.iloc[3,1])#13
print(df.iloc[3:5,1:3])
print(df.iloc[[1,3,5],1:3])
#混合選擇,選擇前三行和‘A’和‘C’的兩列
print(df.ix[:3,['A','C']])
#通過判斷篩選
print(df[df.A>8])
# A B C D
# 2019-01-04 12 13 14 15
# 2019-01-05 16 17 18 19
# 2019-01-06 20 21 22 23
最全的pandas基礎運算(上)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.