import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
# 構造數據
temp = pd.DataFrame(np.random.normal(0,100,(500,505)))
stock_list =["股票"+str(i)for i inrange(temp.shape[0])]
date =["第"+str(i)+"天"for i inrange(temp.shape[1])]
temp.index = stock_list
temp.columns = date
# 取出單列數據
p_change = temp["第1天"]# 使用cut自己進行分組,使用自己定義的組距進行分組
bins =[-500,-100,-7,-5,-3,0,3,5,7,100,500]
a = pd.cut(p_change,bins)# prefix爲展示列名稱前的文字
dummies = pd.get_dummies(a,prefix="rise")# 使用concat合併數據print(pd.concat([temp,dummies],axis=1))# 使用join合併數據print(temp.join(dummies))
二,merge合併數據
1,內鏈接
import pandas as pd
left ={"name":["xiaoming","xiaofang","xiaoli","xiaofu"],"sore":[120,132,124,110],"tel":[10086,10000,10010,10001],"agent":[1,0,0,1]}
right =[{"name":"daming","age":22,"tel":10000},{"name":"dafang","age":32,"tel":120120},{"name":"xiaoli","age":24,"tel":10010},{"name":"xiaofu","age":26,"tel":10010}]
left = pd.DataFrame(left)
right = pd.DataFrame(right)# merge內連接-on中取兩組數據中相同的key名,取出同key的value值全部相同的數據# result = pd.merge(left,right,on=["name","tel"])print(result)
2,左鏈接
import pandas as pd
left ={"name":["xiaoming","xiaofang","xiaoli","xiaofu"],"sore":[120,132,124,110],"tel":[10086,10000,10010,10001],"agent":[1,0,0,1]}
right =[{"name":"daming","age":22,"tel":10000},{"name":"dafang","age":32,"tel":120120},{"name":"xiaoli","age":24,"tel":10010},{"name":"xiaofu","age":26,"tel":10010}]
left = pd.DataFrame(left)
right = pd.DataFrame(right)# merge左連接on中取兩組數據中相同的key名,以左組數據的key的基礎外加右組數據不同的key# on中key的value值全部相同,則顯示右邊特有key的value,其他key的value顯示NaN.
result = pd.merge(left,right,how="left",on=["name","tel"])print(result)
3,右鏈接
import pandas as pd
left ={"name":["xiaoming","xiaofang","xiaoli","xiaofu"],"sore":[120,132,124,110],"tel":[10086,10000,10010,10001],"agent":[1,0,0,1]}
right =[{"name":"daming","age":22,"tel":10000},{"name":"dafang","age":32,"tel":120120},{"name":"xiaoli","age":24,"tel":10010},{"name":"xiaofu","age":26,"tel":10010}]
left = pd.DataFrame(left)
right = pd.DataFrame(right)# merge右連接on中取兩組數據中相同的key名,以右組數據的key的基礎外加左組數據不同的key# on中key的value值全部相同,則顯示左邊特有key的value,其他key的value顯示NaN.
result = pd.merge(left,right,how="right",on=["name","tel"])print(result)
4,外鏈接
import pandas as pd
left ={"name":["xiaoming","xiaofang","xiaoli","xiaofu"],"sore":[120,132,124,110],"tel":[10086,10000,10010,10001],"agent":[1,0,0,1]}
right =[{"name":"daming","age":22,"tel":10000,"agent":1},{"name":"dafang","age":32,"tel":120120,"agent":0},{"name":"xiaoli","age":24,"tel":10010,"agent":1},{"name":"xiaofu","age":26,"tel":10010,"agent":1}]
left = pd.DataFrame(left)
right = pd.DataFrame(right)# merge外連接-分別以左、右兩組數據爲基礎,如果on中的key對應的value值相同,合爲一條數據,# 其他相同key不同value,創建新的key來表示.
result = pd.merge(left,right,how="outer",on=["name","tel"])print(result)