s = pd.Series({'Gender':'F','Height':188},name='new_row')
s=pd.Series(['F',188],index=['Gender','Height'],name ='new_row')
s
df_append.append(s)# s=pd.Series({'Gender':'F','Height':188},name='new_row')# df_append.append(s)
0 M
1 F
10 NaN
11 NaN
Name: Gender, dtype: object 0 NaN
1 NaN
10 M
11 F
Name: Gender, dtype: object
0 173.0
1 192.0
10 NaN
11 NaN
Name: Height, dtype: float64 0 NaN
1 NaN
10 161.0
11 175.0
Name: Height, dtype: float64
df1 = pd.DataFrame({'A':['A0','A1'],'B':['B0','B1']},
index =[0,1])
df2 = pd.DataFrame({'A':['A2','A3'],'B':['B2','B3']},
index =[2,3])
df3 = pd.DataFrame({'A':['A1','A3'],'D':['D1','D3'],'E':['E1','E3']},
index =[1,3])
left = pd.DataFrame({'key1':['K0','K0','K1','K2'],'key2':['K0','K1','K0','K1'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K1','K1','K2'],'key2':['K0','K0','K0','K0'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})
right2 = pd.DataFrame({'key1':['K0','K1','K1','K2'],'key2':['K0','K0','K0','K0'],'C':['C0','C1','C2','C3']})
left = pd.DataFrame({'A':[1,2],'B':[2,2]})
right = pd.DataFrame({'A':[4,5,6],'B':[2,3,4]})#pd.merge(left, right, on='B', how='outer',validate='one_to_one') #報錯
left = pd.DataFrame({'A':[1,2],'B':[2,1]})
pd.merge(left, right, on='B', how='outer',validate='one_to_one')
left = pd.DataFrame({'A':['A0','A1','A2'],'B':['B0','B1','B2']},
index=['K0','K1','K2'])
right = pd.DataFrame({'C':['C0','C2','C3'],'D':['D0','D2','D3']},
index=['K0','K2','K3'])
display(left)
display(right)
display(left.join(right))
left=left.rename_axis(index={None:'indx'})
right=right.rename_axis(index={None:'indx'})# display(left)# display(right)
pd.merge(left,right,how='left',on='indx')
A
B
K0
A0
B0
K1
A1
B1
K2
A2
B2
C
D
K0
C0
D0
K2
C2
D2
K3
C3
D3
A
B
C
D
K0
A0
B0
C0
D0
K1
A1
B1
NaN
NaN
K2
A2
B2
C2
D2
A
B
C
D
indx
K0
A0
B0
C0
D0
K1
A1
B1
NaN
NaN
K2
A2
B2
C2
D2
對於many_to_one模式下的合併,往往join更爲方便
同樣可以指定key:
left = pd.DataFrame({'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3'],'key':['K0','K1','K0','K1']})
right = pd.DataFrame({'C':['C0','C1'],'D':['D0','D1']},
index=['K0','K1'])
left.join(right, on='key')
A
B
key
C
D
0
A0
B0
K0
C0
D0
1
A1
B1
K1
C1
D1
2
A2
B2
K0
C0
D0
3
A3
B3
K1
C1
D1
多層key:
left = pd.DataFrame({'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3'],'key1':['K0','K0','K1','K2'],'key2':['K0','K1','K0','K1']})
index = pd.MultiIndex.from_tuples([('K0','K0'),('K1','K0'),('K2','K0'),('K2','K1')],names=['key1','key2'])
right = pd.DataFrame({'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']},
index=index)
display(left)
display(right)
display(left.join(right, on=['key1','key2']))
pd.merge(left,right,on=['key1','key2'],how='left')
left = pd.DataFrame({'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3'],'key1':['K0','K0','K1','K2'],'key2':['K0','K1','K0','K1']})
index = pd.MultiIndex.from_tuples([('K0','K0'),('K1','K0'),('K2','K0'),('K2','K1')],names=['key1','key2'])
right = pd.DataFrame({'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']},
index=index)
display(left.join(right, on=['key1','key2']))
pd.merge(left,right,on=['key1','key2'],how='left')
df1 = pd.read_csv('data/Employee1.csv')
df2 = pd.read_csv('data/Employee2.csv')
df1['重複']=['Y_1'if df1.loc[i,'Name']in L else'N'for i inrange(df1.shape[0])]
df2['重複']=['Y_2'if df2.loc[i,'Name']in L else'N'for i inrange(df2.shape[0])]
df1 = df1.set_index(['Name','重複'])
df2 = df2.set_index(['Name','重複'])
df_c = pd.concat([df1,df2])
result = pd.DataFrame({'Company':[],'Name':[],'Age':[],'Height':[],'Weight':[],'Salary':[]})
group = df_c.groupby(['Company','重複'])for i in L:
first = group.get_group((i[0].upper(),'Y_1')).reset_index(level=1).loc[i,:][-4:]
second = group.get_group((i[0].upper(),'Y_2')).reset_index(level=1).loc[i,:][-4:]
mean = group.get_group((i[0].upper(),'N')).reset_index(level=1).mean()
final =[i[0].upper(),i]for j inrange(4):
final.append(first[j]ifabs(first[j]-mean[j])<abs(second[j]-mean[j])else second[j])
result = pd.concat([result,pd.DataFrame({result.columns.tolist()[k]:[final[k]]for k inrange(6)})])
result = pd.concat([result.set_index('Name'),df_b])for i inlist('abcde'):for j inrange(1,17):
item = i+str(j)if item notin result.index:
result = pd.concat([result,pd.DataFrame({'Company':[i.upper()],'Name':[item],'Age':[np.nan],'Height':[np.nan],'Weight':[np.nan],'Salary':[np.nan]}).set_index('Name')])
result['Number']=[int(i[1:])for i in result.index]
result.reset_index().drop(columns='Name').set_index(['Company','Number']).sort_index()
df1 = pd.read_csv('data/Course1.csv')
df2 = pd.read_csv('data/Course2.csv')
df_a11,df_a12,df_a21,df_a22 =0,0,0,0
df_a11= df1.query('課程類別 in ["學科基礎課","專業必修課","專業選修課"]')
df_a12= df1.query('課程類別 not in ["學科基礎課","專業必修課","專業選修課"]')
df_a21= df2.query('課程類別 in ["學科基礎課","專業必修課","專業選修課"]')
df_a22= df2.query('課程類別 not in ["學科基礎課","專業必修課","專業選修課"]')
df_a11.head()
(b) 將兩張專業課的分數表和兩張非專業課的分數表分別合併。
special = pd.concat([df_a11,df_a21])
common = pd.concat([df_a12,df_a22])
special.query('課程類別 not in ["學科基礎課","專業必修課","專業選修課"]')
df = pd.concat([df1,df2])
special2 = df.query('課程類別 in ["學科基礎課","專業必修課","專業選修課"]')
common2 = df.query('課程類別 not in ["學科基礎課","專業必修課","專業選修課"]')
(special.equals(special2),common.equals(common2))