數據挖掘工具pandas(十一)數據合併

一,concat / join合併數據

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

# 構造數據
temp = pd.DataFrame(np.random.normal(0,100,(500,505)))
stock_list = ["股票"+ str(i) for i in range(temp.shape[0])]
date = ["第"+ str(i)+"天" for i in range(temp.shape[1])]
temp.index = stock_list
temp.columns = date
	
# 取出單列數據
p_change = temp["第1天"]

# 使用cut自己進行分組,使用自己定義的組距進行分組
bins = [-500,-100,-7,-5,-3,0,3,5,7,100,500]
a = pd.cut(p_change,bins)

# prefix爲展示列名稱前的文字
dummies = pd.get_dummies(a,prefix="rise")

# 使用concat合併數據
print(pd.concat([temp,dummies],axis=1))

# 使用join合併數據
print(temp.join(dummies))

二,merge合併數據

1,內鏈接
import pandas as pd
left = {
    "name" : ["xiaoming","xiaofang","xiaoli","xiaofu"],
    "sore" : [120,132,124,110],
    "tel" : [10086,10000,10010,10001],
    "agent":[1,0,0,1]
}
right = [
    {"name" : "daming", "age": 22,"tel":10000 },
    {"name" : "dafang", "age": 32,"tel":120120 },
    {"name" : "xiaoli", "age": 24,"tel":10010 },
    {"name" : "xiaofu", "age": 26,"tel":10010 }
]
left = pd.DataFrame(left)
right = pd.DataFrame(right)

# merge內連接-on中取兩組數據中相同的key名,取出同key的value值全部相同的數據
# result = pd.merge(left,right,on=["name","tel"])
print(result)

在這裏插入圖片描述

2,左鏈接
import pandas as pd
left = {
    "name" : ["xiaoming","xiaofang","xiaoli","xiaofu"],
    "sore" : [120,132,124,110],
    "tel" : [10086,10000,10010,10001],
    "agent":[1,0,0,1]
}
right = [
    {"name" : "daming", "age": 22,"tel":10000 },
    {"name" : "dafang", "age": 32,"tel":120120 },
    {"name" : "xiaoli", "age": 24,"tel":10010 },
    {"name" : "xiaofu", "age": 26,"tel":10010 }
]
left = pd.DataFrame(left)
right = pd.DataFrame(right)

# merge左連接on中取兩組數據中相同的key名,以左組數據的key的基礎外加右組數據不同的key
# on中key的value值全部相同,則顯示右邊特有key的value,其他key的value顯示NaN.
result = pd.merge(left,right,how="left",on=["name","tel"])

print(result)

在這裏插入圖片描述

3,右鏈接
import pandas as pd
left = {
    "name" : ["xiaoming","xiaofang","xiaoli","xiaofu"],
    "sore" : [120,132,124,110],
    "tel" : [10086,10000,10010,10001],
    "agent":[1,0,0,1]
}
right = [
    {"name" : "daming", "age": 22,"tel":10000 },
    {"name" : "dafang", "age": 32,"tel":120120 },
    {"name" : "xiaoli", "age": 24,"tel":10010 },
    {"name" : "xiaofu", "age": 26,"tel":10010 }
]
left = pd.DataFrame(left)
right = pd.DataFrame(right)

# merge右連接on中取兩組數據中相同的key名,以右組數據的key的基礎外加左組數據不同的key
# on中key的value值全部相同,則顯示左邊特有key的value,其他key的value顯示NaN.
result = pd.merge(left,right,how="right",on=["name","tel"])

print(result)

在這裏插入圖片描述

4,外鏈接
import pandas as pd
left = {
    "name" : ["xiaoming","xiaofang","xiaoli","xiaofu"],
    "sore" : [120,132,124,110],
    "tel" : [10086,10000,10010,10001],
    "agent":[1,0,0,1]
}
right = [
    {"name" : "daming", "age": 22,"tel":10000,"agent":1},
    {"name" : "dafang", "age": 32,"tel":120120,"agent":0},
    {"name" : "xiaoli", "age": 24,"tel":10010 ,"agent":1},
    {"name" : "xiaofu", "age": 26,"tel":10010 ,"agent":1}
]
left = pd.DataFrame(left)
right = pd.DataFrame(right)

# merge外連接-分別以左、右兩組數據爲基礎,如果on中的key對應的value值相同,合爲一條數據,
# 其他相同key不同value,創建新的key來表示.
result = pd.merge(left,right,how="outer",on=["name","tel"])

print(result)

在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章