約定:
import pandas as pd
import numpy as np
from numpy import nan as NaN
填充缺失數據
fillna()是最主要的處理方式了。
df1=pd.DataFrame([[1,2,3],[NaN,NaN,2],[NaN,NaN,NaN],[8,8,NaN]])
df1
代碼結果:
|
0 |
1 |
2 |
0 |
1.0 |
2.0 |
3.0 |
1 |
NaN |
NaN |
2.0 |
2 |
NaN |
NaN |
NaN |
3 |
8.0 |
8.0 |
NaN |
df1.fillna(100)
代碼結果:
|
0 |
1 |
2 |
0 |
1.0 |
2.0 |
3.0 |
1 |
100.0 |
100.0 |
2.0 |
2 |
100.0 |
100.0 |
100.0 |
3 |
8.0 |
8.0 |
100.0 |
df1.fillna({0:10,1:20,2:30})
代碼結果:
|
0 |
1 |
2 |
0 |
1.0 |
2.0 |
3.0 |
1 |
10.0 |
20.0 |
2.0 |
2 |
10.0 |
20.0 |
30.0 |
3 |
8.0 |
8.0 |
30.0 |
df1.fillna(0,inplace=True)
df1
代碼結果:
|
0 |
1 |
2 |
0 |
1.0 |
2.0 |
3.0 |
1 |
0.0 |
0.0 |
2.0 |
2 |
0.0 |
0.0 |
0.0 |
3 |
8.0 |
8.0 |
0.0 |
df2=pd.DataFrame(np.random.randint(0,10,(5,5)))
df2.iloc[1:4,3]=NaN;df2.iloc[2:4,4]=NaN
df2
代碼結果:
|
0 |
1 |
2 |
3 |
4 |
0 |
6 |
6 |
2 |
4.0 |
1.0 |
1 |
4 |
7 |
0 |
NaN |
5.0 |
2 |
6 |
5 |
5 |
NaN |
NaN |
3 |
1 |
9 |
9 |
NaN |
NaN |
4 |
4 |
8 |
1 |
5.0 |
9.0 |
df2.fillna(method='ffill')
代碼結果:
|
0 |
1 |
2 |
3 |
4 |
0 |
6 |
6 |
2 |
4.0 |
1.0 |
1 |
4 |
7 |
0 |
4.0 |
5.0 |
2 |
6 |
5 |
5 |
4.0 |
5.0 |
3 |
1 |
9 |
9 |
4.0 |
5.0 |
4 |
4 |
8 |
1 |
5.0 |
9.0 |
df2.fillna(method='bfill',limit=2)
代碼結果:
|
0 |
1 |
2 |
3 |
4 |
0 |
6 |
6 |
2 |
4.0 |
1.0 |
1 |
4 |
7 |
0 |
NaN |
5.0 |
2 |
6 |
5 |
5 |
5.0 |
9.0 |
3 |
1 |
9 |
9 |
5.0 |
9.0 |
4 |
4 |
8 |
1 |
5.0 |
9.0 |
df2.fillna(method="ffill",limit=1,axis=1)
代碼結果:
|
0 |
1 |
2 |
3 |
4 |
0 |
6.0 |
6.0 |
2.0 |
4.0 |
1.0 |
1 |
4.0 |
7.0 |
0.0 |
0.0 |
5.0 |
2 |
6.0 |
5.0 |
5.0 |
5.0 |
NaN |
3 |
1.0 |
9.0 |
9.0 |
9.0 |
NaN |
4 |
4.0 |
8.0 |
1.0 |
5.0 |
9.0 |
謝謝大家的瀏覽,
希望我的努力能幫助到您,
共勉!