兩張表都有公共行 pagePath,由pagePath的值,將兩張表合併。默認是內關聯,其他關聯方式
最後保存
import pandas as pd
from pandas import DataFrame
data1 = pd.read_excel('C:\\Users\\SHEIN\\Desktop\join\\cate_names.xlsx', sheet_name='sheet1', dtype={'pagePath':str})
print(type(data1))
df_obj1 = DataFrame(data1)
data2 = pd.read_excel('C:\\Users\\SHEIN\\Desktop\join\\ga_traffics.xlsx', sheet_name='Sheet1',dtype={'pagePath':str})
data2 = data2.drop_duplicates(['pagePath'])
df_obj2 = DataFrame(data2)
#df_obj1 = pd.DataFrame({'key1':['b','b','a','c','a','a','b'],'data1':np.random.randint(0,10,7)})
#df_obj2 = pd.DataFrame({'key1':['a','d','b'],'data1':np.random.randint(0,10,3)})
#print(df_obj1)
print(df_obj2)
excel = pd.merge(df_obj1,df_obj2,on='pagePath')
#print(excel_list)
print('開始合併')
excel_list=[excel]
total_excel = pd.concat(excel_list)
#total_excel = excel_list.set_index('cate_tp').T.to_dict('list')
#print(type(total_excel))
total_excel.to_excel('C:\\Users\\SHEIN\\Desktop\join\\total.xlsx',index=False)