第4章 變形
import numpy as np
import pandas as pd
df = pd. read_csv( 'data/table.csv' )
df. head( )
School
Class
ID
Gender
Address
Height
Weight
Math
Physics
0
S_1
C_1
1101
M
street_1
173
63
34.0
A+
1
S_1
C_1
1102
F
street_2
192
73
32.5
B+
2
S_1
C_1
1103
M
street_2
186
82
87.2
B+
3
S_1
C_1
1104
F
street_2
167
81
80.4
B-
4
S_1
C_1
1105
F
street_4
159
64
84.8
B+
一、透視表
1. pivot
一般狀態下,數據在DataFrame會以壓縮(stacked)狀態存放,例如上面的Gender,兩個類別被疊在一列中,pivot函數可將某一列作爲新的cols:
df. pivot( index= 'ID' , columns= 'Gender' , values= 'Height' ) . head( )
df. pivot( index= 'ID' , columns= 'Class' , values= 'Physics' ) . head( )
Class
C_1
C_2
C_3
C_4
ID
1101
A+
NaN
NaN
NaN
1102
B+
NaN
NaN
NaN
1103
B+
NaN
NaN
NaN
1104
B-
NaN
NaN
NaN
1105
B+
NaN
NaN
NaN
然而pivot函數具有很強的侷限性,除了功能上較少之外,還不允許values中出現重複的行列索引對(pair),例如下面的語句就會報錯:
因此,更多的時候會選擇使用強大的pivot_table函數
2. pivot_table
首先,再現上面的操作:
df. pivot_table( index= 'ID' , columns= 'Gender' , values= 'Height' ) . head( )
Gender
F
M
ID
1101
NaN
173.0
1102
192.0
NaN
1103
NaN
186.0
1104
167.0
NaN
1105
159.0
NaN
df. pivot_table( index= 'School' , columns= 'Gender' , values= 'Height' )
Gender
F
M
School
S_1
173.125000
178.714286
S_2
173.727273
172.000000
由於功能更多,速度上自然是比不上原來的pivot函數:
% timeit df. pivot( index= 'ID' , columns= 'Gender' , values= 'Height' )
% timeit pd. pivot_table( df, index= 'ID' , columns= 'Gender' , values= 'Height' )
% timeit pd. crosstab( index= df[ 'ID' ] , columns= df[ 'Gender' ] )
1.67 ms ± 124 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
6.73 ms ± 91.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
8.21 ms ± 213 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Pandas中提供了各種選項,下面介紹常用參數:
① aggfunc:對組內進行聚合統計,可傳入各類函數,默認爲’mean’
pd. pivot_table( df, index= 'School' , columns= 'Gender' , values= 'Height' , aggfunc= [ 'mean' , 'sum' ] ) . head( )
pd. pivot_table( df, index= 'School' , columns= 'Class' , values= 'Height' , aggfunc= [ 'mean' , 'sum' , 'std' ] ) . head( )
mean
sum
std
Class
C_1
C_2
C_3
C_4
C_1
C_2
C_3
C_4
C_1
C_2
C_3
C_4
School
S_1
175.4
170.6
181.2
NaN
877.0
853.0
906.0
NaN
13.538833
11.523888
13.386560
NaN
S_2
164.2
180.0
173.8
173.8
821.0
900.0
869.0
869.0
7.395945
16.000000
14.342245
17.326281
② margins:彙總邊際狀態
pd. pivot_table( df, index= 'School' , columns= 'Gender' , values= 'Height' , aggfunc= [ 'mean' , 'sum' ] , margins= True ) . head( )
pd. pivot_table( df, index= 'School' , columns= 'Gender' , values= 'Height' , aggfunc= [ 'mean' , 'sum' ] , margins= True , margins_name= 'margins' ) . head( )
mean
sum
Gender
F
M
margins
F
M
margins
School
S_1
173.125000
178.714286
175.733333
1385
1251
2636
S_2
173.727273
172.000000
172.950000
1911
1548
3459
margins
173.473684
174.937500
174.142857
3296
2799
6095
③ 行、列、值都可以爲多級
pd. pivot_table( df, index= [ 'School' , 'Class' ] , columns= [ 'Gender' , 'Address' ] , values= [ 'Height' , 'Math' ] )
Height
...
Math
Gender
F
M
...
F
M
Address
street_1
street_2
street_4
street_5
street_6
street_7
street_1
street_2
street_4
street_5
...
street_4
street_5
street_6
street_7
street_1
street_2
street_4
street_5
street_6
street_7
School
Class
S_1
C_1
NaN
179.5
159.0
NaN
NaN
NaN
173.0
186.0
NaN
NaN
...
84.8
NaN
NaN
NaN
34.0
87.2
NaN
NaN
NaN
NaN
C_2
NaN
NaN
176.0
162.0
167.0
NaN
NaN
NaN
NaN
188.0
...
63.5
33.8
68.40
NaN
NaN
NaN
NaN
97.0
58.8
NaN
C_3
175.0
NaN
NaN
187.0
NaN
NaN
NaN
195.0
161.0
NaN
...
NaN
61.7
NaN
NaN
NaN
85.2
31.50
NaN
NaN
49.7
S_2
C_1
NaN
NaN
NaN
159.0
161.0
NaN
NaN
NaN
163.5
NaN
...
NaN
72.2
50.60
NaN
NaN
NaN
43.35
NaN
NaN
83.3
C_2
NaN
NaN
NaN
NaN
NaN
188.5
175.0
NaN
155.0
193.0
...
NaN
NaN
NaN
76.95
47.2
NaN
73.80
39.1
NaN
NaN
C_3
NaN
NaN
157.0
NaN
164.0
190.0
NaN
NaN
187.0
171.0
...
72.3
NaN
95.50
65.90
NaN
NaN
48.90
32.7
NaN
NaN
C_4
NaN
176.0
NaN
NaN
175.5
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
53.65
NaN
NaN
NaN
NaN
NaN
NaN
48.7
7 rows × 24 columns
3. crosstab(交叉表)
交叉表是一種特殊的透視表,典型的用途如分組統計,如現在想要統計關於街道和性別分組的頻數:
df. head( )
School
Class
ID
Gender
Address
Height
Weight
Math
Physics
0
S_1
C_1
1101
M
street_1
173
63
34.0
A+
1
S_1
C_1
1102
F
street_2
192
73
32.5
B+
2
S_1
C_1
1103
M
street_2
186
82
87.2
B+
3
S_1
C_1
1104
F
street_2
167
81
80.4
B-
4
S_1
C_1
1105
F
street_4
159
64
84.8
B+
pd. crosstab( index= df[ 'Address' ] , columns= df[ 'Gender' ] )
pd. crosstab( index= df[ 'School' ] , columns= df[ 'Class' ] )
Class
C_1
C_2
C_3
C_4
School
S_1
5
5
5
0
S_2
5
5
5
5
交叉表的功能也很強大(但目前還不支持多級分組),下面說明一些重要參數:
① values和aggfunc:分組對某些數據進行聚合操作,這兩個參數必須成對出現
pd. crosstab( index= df[ 'School' ] , columns= df[ 'Class' ] , values= df[ 'Math' ] , aggfunc= 'mean' , normalize= 'index' )
Class
C_1
C_2
C_3
C_4
School
S_1
0.333508
0.336227
0.330266
0.000000
S_2
0.245823
0.263622
0.264713
0.225842
pd. crosstab( index= df[ 'Address' ] , columns= df[ 'Gender' ] ,
values= np. random. randint( 1 , 20 , df. shape[ 0 ] ) , aggfunc= 'min' )
Gender
F
M
Address
street_1
13
12
street_2
5
6
street_4
5
2
street_5
3
14
street_6
2
6
street_7
1
4
② 除了邊際參數margins外,還引入了normalize參數,可選’all’,‘index’,'columns’參數值
pd. crosstab( index= df[ 'Address' ] , columns= df[ 'Gender' ] , normalize= 'all' , margins= True )
Gender
F
M
All
Address
street_1
0.028571
0.057143
0.085714
street_2
0.114286
0.057143
0.171429
street_4
0.085714
0.142857
0.228571
street_5
0.085714
0.085714
0.171429
street_6
0.142857
0.028571
0.171429
street_7
0.085714
0.085714
0.171429
All
0.542857
0.457143
1.000000
二、其他變形方法
1. melt
melt函數可以認爲是pivot函數的逆操作,將unstacked狀態的數據,壓縮成stacked,使“寬”的DataFrame變“窄”
df_m = df[ [ 'ID' , 'Gender' , 'Math' ] ]
df_m. head( )
ID
Gender
Math
0
1101
M
34.0
1
1102
F
32.5
2
1103
M
87.2
3
1104
F
80.4
4
1105
F
84.8
df. pivot( index= 'ID' , columns= 'Gender' , values= 'Math' ) . head( )
Gender
F
M
ID
1101
NaN
34.0
1102
32.5
NaN
1103
NaN
87.2
1104
80.4
NaN
1105
84.8
NaN
melt函數中的id_vars表示需要保留的列,value_vars表示需要stack的一組列
pivoted= df. pivot( index= 'ID' , columns= 'Gender' , values= 'Math' )
result= pivoted. reset_index( ) . melt( id_vars= [ 'ID' ] , value_vars= [ 'F' , 'M' ] , value_name= 'Math' ) . dropna( ) . set_index( 'ID' ) . sort_index( ) . head( )
result
Gender
Math
ID
1101
M
34.0
1102
F
32.5
1103
M
87.2
1104
F
80.4
1105
F
84.8
pivoted = df. pivot( index= 'ID' , columns= 'Gender' , values= 'Math' )
result = pivoted. reset_index( ) . melt( id_vars= [ 'ID' ] , value_vars= [ 'F' , 'M' ] , value_name= 'Math' ) \
. dropna( ) . set_index( 'ID' ) . sort_index( )
result. equals( df_m. set_index( 'ID' ) )
True
2. 壓縮與展開
(1)stack:這是最基礎的變形函數,總共只有兩個參數:level和dropna
df_s = pd. pivot_table( df, index= [ 'Class' , 'ID' ] , columns= 'Gender' , values= [ 'Height' , 'Weight' ] )
df_s. groupby( 'Class' ) . head( 2 )
Height
Weight
Gender
F
M
F
M
Class
ID
C_1
1101
NaN
173.0
NaN
63.0
1102
192.0
NaN
73.0
NaN
C_2
1201
NaN
188.0
NaN
68.0
1202
176.0
NaN
94.0
NaN
C_3
1301
NaN
161.0
NaN
68.0
1302
175.0
NaN
57.0
NaN
C_4
2401
192.0
NaN
62.0
NaN
2402
NaN
166.0
NaN
82.0
df_stacked = df_s. stack( )
df_stacked. groupby( 'Class' ) . head( 2 )
Height
Weight
Class
ID
Gender
C_1
1101
M
173.0
63.0
1102
F
192.0
73.0
C_2
1201
M
188.0
68.0
1202
F
176.0
94.0
C_3
1301
M
161.0
68.0
1302
F
175.0
57.0
C_4
2401
F
192.0
62.0
2402
M
166.0
82.0
stack函數可以看做將橫向的索引放到縱向,因此功能類似與melt,參數level可指定變化的列索引是哪一層(或哪幾層,需要列表)
df_stacked = df_s. stack( 1 )
df_stacked. groupby( 'Class' ) . head( 2 )
Height
Weight
Class
ID
Gender
C_1
1101
M
173.0
63.0
1102
F
192.0
73.0
C_2
1201
M
188.0
68.0
1202
F
176.0
94.0
C_3
1301
M
161.0
68.0
1302
F
175.0
57.0
C_4
2401
F
192.0
62.0
2402
M
166.0
82.0
df_stacked= df_s. stack( 0 )
df_stacked. groupby( 'Class' ) . head( 2 )
Gender
F
M
Class
ID
C_1
1101
Height
NaN
173.0
Weight
NaN
63.0
C_2
1201
Height
NaN
188.0
Weight
NaN
68.0
C_3
1301
Height
NaN
161.0
Weight
NaN
68.0
C_4
2401
Height
192.0
NaN
Weight
62.0
NaN
(2) unstack:stack的逆函數,功能上類似於pivot_table
df_stacked. head( )
Gender
F
M
Class
ID
C_1
1101
Height
NaN
173.0
Weight
NaN
63.0
1102
Height
192.0
NaN
Weight
73.0
NaN
1103
Height
NaN
186.0
result= df_stacked. unstack( ) . swaplevel( 1 , 0 , axis= 1 ) . sort_index( axis= 1 )
result
result. groupby( 'Class' ) . head( 2 )
Height
Weight
Gender
F
M
F
M
Class
ID
C_1
1101
NaN
173.0
NaN
63.0
1102
192.0
NaN
73.0
NaN
C_2
1201
NaN
188.0
NaN
68.0
1202
176.0
NaN
94.0
NaN
C_3
1301
NaN
161.0
NaN
68.0
1302
175.0
NaN
57.0
NaN
C_4
2401
192.0
NaN
62.0
NaN
2402
NaN
166.0
NaN
82.0
三、啞變量與因子化
1. Dummy Variable(啞變量)
這裏主要介紹get_dummies函數,其功能主要是進行one-hot編碼:
df_d = df[ [ 'Class' , 'Gender' , 'Weight' ] ]
df_d. head( )
Class
Gender
Weight
0
C_1
M
63
1
C_1
F
73
2
C_1
M
82
3
C_1
F
81
4
C_1
F
64
現在希望將上面的表格前兩列轉化爲啞變量,並加入第三列Weight數值:
pd. get_dummies( df_d[ [ 'Class' , 'Gender' ] ] ) . join( df_d[ 'Weight' ] ) . head( )
Class_C_1
Class_C_2
Class_C_3
Class_C_4
Gender_F
Gender_M
Weight
0
1
0
0
0
0
1
63
1
1
0
0
0
1
0
73
2
1
0
0
0
0
1
82
3
1
0
0
0
1
0
81
4
1
0
0
0
1
0
64
2. factorize方法
該方法主要用於自然數編碼,並且缺失值會被記做-1,其中sort參數表示是否排序後賦值
codes, uniques = pd. factorize( [ 'b' , None , 'a' , 'c' , 'b' ] , sort= True )
display( codes)
display( uniques)
array([ 1, -1, 0, 2, 1], dtype=int64)
array(['a', 'b', 'c'], dtype=object)
四、問題與練習
1. 問題
【問題一】 上面提到了許多變形函數,如melt/crosstab/pivot/pivot_table/stack/unstack函數,請總結它們各自的使用特點。
melt &unstack 將unstacked狀態的數據,壓縮成stacked,使“寬”的DataFrame變“窄”
pivot&pivot_table&stack pivot函數可將某一列作爲新的cols
crosstab 交叉表是一種特殊的透視表,典型的用途如分組統計
【問題二】 變形函數和多級索引是什麼關係?哪些變形函數會使得索引維數變化?具體如何變化?
【問題三】 請舉出一個除了上文提過的關於啞變量方法的例子。
pd.get_dummies(df_d[[‘Class’,‘Gender’]]),感覺有的時候需要把一些類別特徵化成數字特徵的時候可以用
【問題四】 使用完stack後立即使用unstack一定能保證變化結果與原始表完全一致嗎?
不一定,要看具體操作的時候stack跟unstack操作時level參數
【問題五】 透視表中涉及了三個函數,請分別使用它們完成相同的目標(任務自定)並比較哪個速度最快。
%timeit df.pivot(index=‘ID’,columns=‘Gender’,values=‘Height’)
%timeit pd.pivot_table(df,index=‘ID’,columns=‘Gender’,values=‘Height’)
%timeit pd.crosstab(index=df[‘ID’],columns=df[‘Gender’])
1.67 ms ± 124 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
6.73 ms ± 91.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
8.21 ms ± 213 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
【問題六】 既然melt起到了stack的功能,爲什麼再設計stack函數?
stack 不用把那一列裏面具體有哪些類寫出來感覺對於類別不知道或者很多的情況下就需要用到stack
2. 練習
【練習一】 繼續使用上一章的藥物數據集:
pd. read_csv( 'data/Drugs.csv' ) . head( )
YYYY
State
COUNTY
SubstanceName
DrugReports
0
2010
VA
ACCOMACK
Propoxyphene
1
1
2010
OH
ADAMS
Morphine
9
2
2010
PA
ADAMS
Methadone
2
3
2010
VA
ALEXANDRIA CITY
Heroin
5
4
2010
PA
ALLEGHENY
Hydromorphone
5
(a) 現在請你將數據錶轉化成如下形態,每行需要顯示每種藥物在每個地區的10年至17年的變化情況,且前三列需要排序:
[外鏈圖片轉存失敗,源站可能有防盜鏈機制,建議將圖片保存下來直接上傳(img-MPkKSaM4-1588087666791)(picture/drug_pic.png)]
df= pd. read_csv( 'data/Drugs.csv' , index_col= [ 'State' , 'COUNTY' , 'SubstanceName' ] ) . sort_index( )
df. head( )
YYYY
DrugReports
State
COUNTY
SubstanceName
KY
ADAIR
Buprenorphine
2011
3
Buprenorphine
2012
5
Buprenorphine
2013
4
Buprenorphine
2014
27
Buprenorphine
2015
5
result= pd. pivot_table( df, index= [ 'State' , 'COUNTY' , 'SubstanceName' ] , columns= 'YYYY' , values= 'DrugReports' , fill_value= '-' ) . reset_index( ) . rename_axis( columns= { 'YYYY' : '' } )
result. head( )
State
COUNTY
SubstanceName
2010
2011
2012
2013
2014
2015
2016
2017
0
KY
ADAIR
Buprenorphine
-
3
5
4
27
5
7
10
1
KY
ADAIR
Codeine
-
-
1
-
-
-
-
1
2
KY
ADAIR
Fentanyl
-
-
1
-
-
-
-
-
3
KY
ADAIR
Heroin
-
-
1
2
-
1
-
2
4
KY
ADAIR
Hydrocodone
6
9
10
10
9
7
11
3
答案
df = pd. read_csv( 'data/Drugs.csv' , index_col= [ 'State' , 'COUNTY' ] ) . sort_index( )
df. head( )
YYYY
SubstanceName
DrugReports
State
COUNTY
KY
ADAIR
2010
Methadone
1
ADAIR
2010
Hydrocodone
6
ADAIR
2011
Oxycodone
4
ADAIR
2011
Buprenorphine
3
ADAIR
2011
Morphine
2
result = pd. pivot_table( df, index= [ 'State' , 'COUNTY' , 'SubstanceName' ]
, columns= 'YYYY'
, values= 'DrugReports' , fill_value= '-' ) . reset_index( ) . rename_axis( columns= { 'YYYY' : '' } )
result. head( )
State
COUNTY
SubstanceName
2010
2011
2012
2013
2014
2015
2016
2017
0
KY
ADAIR
Buprenorphine
-
3
5
4
27
5
7
10
1
KY
ADAIR
Codeine
-
-
1
-
-
-
-
1
2
KY
ADAIR
Fentanyl
-
-
1
-
-
-
-
-
3
KY
ADAIR
Heroin
-
-
1
2
-
1
-
2
4
KY
ADAIR
Hydrocodone
6
9
10
10
9
7
11
3
(b) 現在請將(a)中的結果恢復到原數據表,並通過equal函數檢驗初始表與新的結果是否一致(返回True)
pivoted= result. rename_axis( columns= { '' : 'YYYY' } )
melted= pivoted. melt( id_vars= [ 'State' , 'COUNTY' , 'SubstanceName' ] , value_vars= [ 2010 , 2011 , 2012 , 2013 , 2014 , 2015 , 2016 , 2017 ] , value_name= 'DrugReports' ) . set_index( [ 'State' , 'COUNTY' , 'SubstanceName' ] ) . dropna( ) . sort_index( )
melted= melted. query( 'DrugReports!="-"' )
melted. head( )
melted[ 'DrugReports' ] = melted[ 'DrugReports' ] . astype( np. int64)
melted[ 'YYYY' ] = melted[ 'YYYY' ] . astype( np. int64)
type ( melted[ 'DrugReports' ] [ 0 ] )
type ( melted[ 'YYYY' ] [ 0 ] )
type ( df[ 'YYYY' ] [ 0 ] )
melted. equals( df)
答案
result_melted = result. melt( id_vars= result. columns[ : 3 ] , value_vars= result. columns[ - 8 : ]
, var_name= 'YYYY' , value_name= 'DrugReports' ) . query( 'DrugReports != "-"' )
result2 = result_melted. sort_values( by= [ 'State' , 'COUNTY' , 'YYYY'
, 'SubstanceName' ] ) . reset_index( ) . drop( columns= 'index' )
cols = list ( result2. columns)
a, b = cols. index( 'SubstanceName' ) , cols. index( 'YYYY' )
cols[ b] , cols[ a] = cols[ a] , cols[ b]
result2 = result2[ cols] . astype( { 'DrugReports' : 'int' , 'YYYY' : 'int' } )
result2. head( )
State
COUNTY
YYYY
SubstanceName
DrugReports
0
KY
ADAIR
2010
Hydrocodone
6
1
KY
ADAIR
2010
Methadone
1
2
KY
ADAIR
2011
Buprenorphine
3
3
KY
ADAIR
2011
Hydrocodone
9
4
KY
ADAIR
2011
Morphine
2
df_tidy = df. reset_index( ) . sort_values( by= result2. columns[ : 4 ] . tolist( ) ) . reset_index( ) . drop( columns= 'index' )
df_tidy. head( )
State
COUNTY
YYYY
SubstanceName
DrugReports
0
KY
ADAIR
2010
Hydrocodone
6
1
KY
ADAIR
2010
Methadone
1
2
KY
ADAIR
2011
Buprenorphine
3
3
KY
ADAIR
2011
Hydrocodone
9
4
KY
ADAIR
2011
Morphine
2
df_tidy. equals( result2)
False
【練習二】 現有一份關於某地區地震情況的數據集,請解決如下問題:
pd. read_csv( 'data/Earthquake.csv' ) . head( )
日期
時間
維度
經度
方向
距離
深度
烈度
0
2003.05.20
12:17:44 AM
39.04
40.38
west
0.1
10.0
0.0
1
2007.08.01
12:03:08 AM
40.79
30.09
west
0.1
5.2
4.0
2
1978.05.07
12:41:37 AM
38.58
27.61
south_west
0.1
0.0
0.0
3
1997.03.22
12:31:45 AM
39.47
36.44
south_west
0.1
10.0
0.0
4
2000.04.02
12:57:38 AM
40.80
30.24
south_west
0.1
7.0
0.0
(a) 現在請你將數據錶轉化成如下形態,將方向列展開,並將距離、深度和烈度三個屬性壓縮:
[外鏈圖片轉存失敗,源站可能有防盜鏈機制,建議將圖片保存下來直接上傳(img-9PvoGaCn-1588087666793)(picture/earthquake_pic.png)]
方案一
答案
df = pd. read_csv( 'data/Earthquake.csv' )
df = df. sort_values( by= df. columns. tolist( ) [ : 3 ] ) . sort_index( axis= 1 ) . reset_index( ) . drop( columns= 'index' )
df. head( )
方向
日期
時間
深度
烈度
經度
維度
距離
0
south_east
1912.08.09
12:29:00 AM
16.0
6.7
27.2
40.6
4.3
1
south_west
1912.08.10
12:23:00 AM
15.0
6.0
27.1
40.6
2.0
2
south_west
1912.08.10
12:30:00 AM
15.0
5.2
27.1
40.6
2.0
3
south_east
1912.08.11
12:19:04 AM
30.0
4.9
27.2
40.6
4.3
4
south_west
1912.08.11
12:20:00 AM
15.0
4.5
27.1
40.6
2.0
result = pd. pivot_table( df, index= [ '日期' , '時間' , '維度' , '經度' ]
, columns= '方向'
, values= [ '烈度' , '深度' , '距離' ] , fill_value= '-' ) . stack( level= 0 ) . rename_axis( index= { None : '地震參數' } )
result. head( 6 )
方向
east
north
north_east
north_west
south
south_east
south_west
west
日期
時間
維度
經度
地震參數
1912.08.09
12:29:00 AM
40.6
27.2
深度
-
-
-
-
-
16
-
-
烈度
-
-
-
-
-
6.7
-
-
距離
-
-
-
-
-
4.3
-
-
1912.08.10
12:23:00 AM
40.6
27.1
深度
-
-
-
-
-
-
15
-
烈度
-
-
-
-
-
-
6
-
距離
-
-
-
-
-
-
2
-
(b) 現在請將(a)中的結果恢復到原數據表,並通過equal函數檢驗初始表與新的結果是否一致(返回True)
result= result. rename_axis( index= { '地震參數' : None } )
df_result= result. unstack( ) . stack( 0 ) [ ( result. unstack( ) . stack( 0 ) != '-' ) . any ( 1 ) ] . reset_index( )
df_result = df_result. sort_index( axis= 1 ) . astype( { '深度' : 'float64' , '烈度' : 'float64' , '距離' : 'float64' } )
df_result. head( )
df_result = result. unstack( ) . stack( 0 ) [ ( ~ ( result. unstack( ) . stack( 0 ) == '-' ) ) . any ( 1 ) ] . reset_index( )
df_result. columns. name= None
df_result = df_result. sort_index( axis= 1 ) . astype( { '深度' : 'float64' , '烈度' : 'float64' , '距離' : 'float64' } )
df_result. head( )
方向
日期
時間
深度
烈度
經度
維度
距離
0
south_east
1912.08.09
12:29:00 AM
16.0
6.7
27.2
40.6
4.3
1
south_west
1912.08.10
12:23:00 AM
15.0
6.0
27.1
40.6
2.0
2
south_west
1912.08.10
12:30:00 AM
15.0
5.2
27.1
40.6
2.0
3
south_east
1912.08.11
12:19:04 AM
30.0
4.9
27.2
40.6
4.3
4
south_west
1912.08.11
12:20:00 AM
15.0
4.5
27.1
40.6
2.0
df_result. astype( { '深度' : 'float64' , '烈度' : 'float64' , '距離' : 'float64' } , copy= False ) . dtypes
方向 object
日期 object
時間 object
深度 float64
烈度 float64
經度 float64
維度 float64
距離 float64
dtype: object
df. equals( df_result)
True