量化投資 — 配對交易策略 (Pair Trading)

配對交易策略 Pair Trading

0. 引庫

import pandas as pd
import numpy as np
import tushare as ts
import seaborn
from matplotlib import pyplot as plt
plt.style.use('seaborn')
%matplotlib inline
data = pd.read_csv('pair-trade-data.csv')
data.set_index('date',inplace = True)
data.head()
000568 000858
date
2010/1/4 27.488118 26.117536
2010/1/5 27.335123 26.391583
2010/1/6 26.941707 25.694008
2010/1/7 26.388011 24.913389
2010/1/8 26.825140 24.863562
data.plot(figsize=(8, 6));

在這裏插入圖片描述

2. 策略開發思路

# 價差是迴歸的(不科學想法)
data['priceDelta'] = data['000568'] - data['000858']
data.head()
000568 000858 priceDelta
date
2010/1/4 27.488118 26.117536 1.370582
2010/1/5 27.335123 26.391583 0.943540
2010/1/6 26.941707 25.694008 1.247699
2010/1/7 26.388011 24.913389 1.474622
2010/1/8 26.825140 24.863562 1.961578
# 圖示價差及其均值
data['priceDelta'].plot(figsize=(8, 6));
plt.ylabel('Spread')
plt.axhline(data['priceDelta'].mean());

在這裏插入圖片描述

# 對價差進行標準化
data['zscore'] = (data['priceDelta'] - np.mean(data['priceDelta']))/np.std(data['priceDelta'])
data.head()
000568 000858 priceDelta zscore
date
2010/1/4 27.488118 26.117536 1.370582 0.569895
2010/1/5 27.335123 26.391583 0.943540 0.500520
2010/1/6 26.941707 25.694008 1.247699 0.549932
2010/1/7 26.388011 24.913389 1.474622 0.586796
2010/1/8 26.825140 24.863562 1.961578 0.665903
len(data[data['zscore'] > 1.5])
17
# 'position_1'是000568開平倉信號
data['position_1'] = np.where(data['zscore'] > 1.5, -1, np.nan)
data['position_1'] = np.where(data['zscore'] < -1.5, 1, data['position_1'])
data['position_1'] = np.where(abs(data['zscore']) < 0.5, 0, data['position_1'])
data.head()
000568 000858 priceDelta zscore position_1
date
2010/1/4 27.488118 26.117536 1.370582 0.569895 NaN
2010/1/5 27.335123 26.391583 0.943540 0.500520 NaN
2010/1/6 26.941707 25.694008 1.247699 0.549932 NaN
2010/1/7 26.388011 24.913389 1.474622 0.586796 NaN
2010/1/8 26.825140 24.863562 1.961578 0.665903 NaN

產生交易信號

data['position_1'] = data['position_1'].ffill().fillna(0)
data['position_1'].plot(ylim=[-1.1, 1.1], figsize=(10, 6));

在這裏插入圖片描述

# 'position_2'是000858開平倉信號(與000568符號相反)
data['position_2'] = -np.sign(data['position_1'])
data['position_2'].plot(ylim=[-1.1, 1.1], figsize=(10, 6));

在這裏插入圖片描述

3. 計算策略年化收益並可視化

data['returns_1'] = (np.log(data['000568'] / data['000568'].shift(1))).fillna(0)
data['returns_2'] = (np.log(data['000858'] / data['000858'].shift(1))).fillna(0)
data.head(10)
000568 000858 priceDelta zscore position_1 position_2 returns_1 returns_2
date
2010/1/4 27.488118 26.117536 1.370582 0.569895 0.0 -0.0 0.000000 0.000000
2010/1/5 27.335123 26.391583 0.943540 0.500520 0.0 -0.0 -0.005581 0.010438
2010/1/6 26.941707 25.694008 1.247699 0.549932 0.0 -0.0 -0.014497 -0.026787
2010/1/7 26.388011 24.913389 1.474622 0.586796 0.0 -0.0 -0.020766 -0.030852
2010/1/8 26.825140 24.863562 1.961578 0.665903 0.0 -0.0 0.016430 -0.002002
2010/1/11 25.936311 24.631037 1.305274 0.559285 0.0 -0.0 -0.033696 -0.009396
2010/1/12 26.409867 25.336916 1.072951 0.521543 0.0 -0.0 0.018094 0.028255
2010/1/13 26.577433 25.137609 1.439824 0.581143 0.0 -0.0 0.006325 -0.007897
2010/1/14 28.420660 26.109231 2.311428 0.722738 0.0 -0.0 0.067054 0.037924
2010/1/15 28.253094 26.208885 2.044209 0.679327 0.0 -0.0 -0.005913 0.003810
data['strategy'] = 0.5*(data['position_1'].shift(1) * data['returns_1']) + 0.5*(data['position_2'].shift(1) * data['returns_2'])
# 計算累積收益率
data[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).tail(1)
returns_1 returns_2 strategy
date
2019/4/8 2.470158 3.837651 0.986754
# 可視化累積收益率
data[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).plot(figsize=(10, 6));

在這裏插入圖片描述

Pair trading 策略 - 小範圍時間(2013.6-2014.12)

data2 = pd.read_csv('pair-trade-data2.csv')
data2.set_index('date',inplace = True)
data2.head()
000568 000858
date
2013/6/3 20.719056 20.343053
2013/6/4 20.357220 20.060867
2013/6/5 20.514540 20.274644
2013/6/6 20.113374 20.172031
2013/6/7 19.704342 19.667508
data2.plot(figsize=(8, 6));

在這裏插入圖片描述

# 價差是迴歸的(不科學想法)
data2['priceDelta'] = data['000568'] - data['000858']
data2.head()
000568 000858 priceDelta
date
2013/6/3 20.719056 20.343053 0.376004
2013/6/4 20.357220 20.060867 0.296353
2013/6/5 20.514540 20.274644 0.239896
2013/6/6 20.113374 20.172031 -0.058657
2013/6/7 19.704342 19.667508 0.036833
# 圖示價差及其均值
data2['priceDelta'].plot(figsize=(8, 6));
plt.ylabel('Spread')
plt.axhline(data2['priceDelta'].mean());

在這裏插入圖片描述

# 對價差進行標準化
data2['zscore'] = (data2['priceDelta'] - np.mean(data2['priceDelta']))/np.std(data2['priceDelta'])
data2.head()
000568 000858 priceDelta zscore
date
2013/6/3 20.719056 20.343053 0.376004 0.048513
2013/6/4 20.357220 20.060867 0.296353 0.000596
2013/6/5 20.514540 20.274644 0.239896 -0.033369
2013/6/6 20.113374 20.172031 -0.058657 -0.212979
2013/6/7 19.704342 19.667508 0.036833 -0.155532
len(data2[data2['zscore'] > 1.5])
40
len(data2[data2['zscore'] < -1.5])
16
# 'position_1'是000568開平倉信號
data2['position_1'] = np.where(data2['zscore'] > 1.5, -1, np.nan)
data2['position_1'] = np.where(data2['zscore'] < -1.5, 1, data2['position_1'])
data2['position_1'] = np.where(abs(data2['zscore']) < 0.5, 0, data2['position_1'])
data2.head()
000568 000858 priceDelta zscore position_1
date
2013/6/3 20.719056 20.343053 0.376004 0.048513 0.0
2013/6/4 20.357220 20.060867 0.296353 0.000596 0.0
2013/6/5 20.514540 20.274644 0.239896 -0.033369 0.0
2013/6/6 20.113374 20.172031 -0.058657 -0.212979 0.0
2013/6/7 19.704342 19.667508 0.036833 -0.155532 0.0
data2['position_1'] = data2['position_1'].ffill().fillna(0)
data2['position_1'].plot(ylim=[-1.1, 1.1], figsize=(10, 6));

在這裏插入圖片描述

# 'position_2'是000858開平倉信號(與000568符號相反)
data2['position_2'] = -np.sign(data2['position_1'])
data2['position_2'].plot(ylim=[-1.1, 1.1], figsize=(10, 6));

在這裏插入圖片描述

data2['returns_1'] = (np.log(data2['000568'] / data2['000568'].shift(1))).fillna(0)
data2['returns_2'] = (np.log(data2['000858'] / data2['000858'].shift(1))).fillna(0)
data2.head(10)
000568 000858 priceDelta zscore position_1 position_2 returns_1 returns_2
date
2013/6/3 20.719056 20.343053 0.376004 0.048513 0.0 -0.0 0.000000 0.000000
2013/6/4 20.357220 20.060867 0.296353 0.000596 0.0 -0.0 -0.017618 -0.013968
2013/6/5 20.514540 20.274644 0.239896 -0.033369 0.0 -0.0 0.007698 0.010600
2013/6/6 20.113374 20.172031 -0.058657 -0.212979 0.0 -0.0 -0.019749 -0.005074
2013/6/7 19.704342 19.667508 0.036833 -0.155532 0.0 -0.0 -0.020546 -0.025329
2013/6/13 19.562754 19.012515 0.550239 0.153334 0.0 -0.0 -0.007212 -0.033871
2013/6/14 19.617816 19.012515 0.605301 0.186459 0.0 -0.0 0.002811 0.000000
2013/6/17 19.255979 18.720423 0.535556 0.144501 0.0 -0.0 -0.018616 -0.015482
2013/6/18 19.405434 18.853192 0.552241 0.154539 0.0 -0.0 0.007731 0.007067
2013/6/19 19.956054 19.269202 0.686852 0.235521 0.0 -0.0 0.027979 0.021826
data2['strategy'] = 0.5*(data2['position_1'].shift(1) * data2['returns_1']) + 0.5*(data2['position_2'].shift(1) * data2['returns_2'])
# 計算累積收益率
data2[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).tail(1)
returns_1 returns_2 strategy
date
2014/12/31 0.892955 0.97347 1.12623
# 可視化累積收益率
data2[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).plot(figsize=(10, 6));

在這裏插入圖片描述

# 計算年化收益率
data2[['returns_1','returns_2','strategy']].dropna().mean() * 252
returns_1   -0.073915
returns_2   -0.017554
strategy     0.077608
dtype: float64
# 計算年化風險
data2[['returns_1','returns_2','strategy']].dropna().std() * 252 ** 0.5
returns_1    0.300306
returns_2    0.280425
strategy     0.057016
dtype: float64
# 策略累積收益率
data2['cumret'] = data2['strategy'].dropna().cumsum().apply(np.exp)
# 策略累積最大值
data2['cummax'] = data2['cumret'].cummax()
# 算回撤序列
drawdown = (data2['cummax'] - data2['cumret'])
# 算最大回撤
drawdown.max()
0.03645280148896235

Pair trading 策略 - 考慮時間序列平穩性

import pandas as pd
import numpy as np
import tushare as ts
import seaborn
from matplotlib import pyplot as plt
plt.style.use('seaborn')
%matplotlib inline

1. 數據準備

data3 = pd.read_csv('pair-trade-data2.csv')
data3.set_index('date',inplace = True)
data3.head()
000568 000858
date
2013/6/3 20.719056 20.343053
2013/6/4 20.357220 20.060867
2013/6/5 20.514540 20.274644
2013/6/6 20.113374 20.172031
2013/6/7 19.704342 19.667508
data3.plot(figsize=(8,6));

在這裏插入圖片描述

2. 策略開發思路

data3.corr()  # 協方差矩陣
000568 000858
000568 1.000000 0.552409
000858 0.552409 1.000000
# 可視化看相關關係
plt.figure(figsize =(10,8))
plt.title('Stock Correlation')
plt.plot(data['000568'], data['000858'], '.');
plt.xlabel('000568')
plt.ylabel('000858')
data.dropna(inplace = True)

在這裏插入圖片描述

# 對兩股票價格做線性迴歸(白噪聲項符合正態分佈)
[slope, intercept] = np.polyfit(data3.iloc[:,0], data3.iloc[:,1], 1).round(2)      
slope,intercept 
(0.51, 7.82)
data3['spread'] = data3.iloc[:,1] - (data3.iloc[:,0]*slope + intercept)
data3.head()
000568 000858 spread
date
2013/6/3 20.719056 20.343053 1.956334
2013/6/4 20.357220 20.060867 1.858684
2013/6/5 20.514540 20.274644 1.992228
2013/6/6 20.113374 20.172031 2.094210
2013/6/7 19.704342 19.667508 1.798294
data3['spread'].plot(figsize = (10,8),title = 'Price Spread');

在這裏插入圖片描述

data3['zscore'] = (data3['spread'] - data3['spread'].mean())/data3['spread'].std()
data3.head()
000568 000858 spread zscore
date
2013/6/3 20.719056 20.343053 1.956334 1.452385
2013/6/4 20.357220 20.060867 1.858684 1.382488
2013/6/5 20.514540 20.274644 1.992228 1.478078
2013/6/6 20.113374 20.172031 2.094210 1.551075
2013/6/7 19.704342 19.667508 1.798294 1.339261
data3['zscore'].plot(figsize = (10,8),title = 'Z-score')
plt.axhline(1.5)
plt.axhline(0)
plt.axhline(-1.5)
<matplotlib.lines.Line2D at 0xcb62632e8>

在這裏插入圖片描述

產生交易信號

data3['position_1'] = np.where(data3['zscore'] > 1.5, 1, np.nan)
data3['position_1'] = np.where(data3['zscore'] < -1.5, -1, data3['position_1'])
data3['position_1'] = np.where(abs(data3['zscore']) < 0.5, 0, data3['position_1'])
data3['position_1'] = data3['position_1'].ffill().fillna(0)
data3['position_1'].plot(ylim=[-1.1, 1.1], figsize=(10, 6),title = 'Trading Signal_Uptrade');

在這裏插入圖片描述

data3['position_2'] = -np.sign(data3['position_1'])
data3['position_2'].plot(ylim=[-1.1, 1.1], figsize=(10, 6),title = 'Trading Signal_Downtrade');

在這裏插入圖片描述

3. 計算策略年化收益並可視化

data3['returns_1'] = np.log(data3['000568'] / data3['000568'].shift(1))
data3['returns_2'] = np.log(data3['000858'] / data3['000858'].shift(1))
data3['strategy'] = 0.5*(data3['position_1'].shift(1) * data3['returns_1']) + 0.5*(data3['position_2'].shift(1) * data3['returns_2'])
# 計算累積收益率
data3[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).tail(1)
returns_1 returns_2 strategy
date
2014/12/31 0.892955 0.97347 1.174494
data3[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).plot(figsize=(10, 8),title = 'Strategy_Backtesting');

在這裏插入圖片描述

# 計算年化收益率
data3[['returns_1','returns_2','strategy']].dropna().mean() * 252
returns_1   -0.073915
returns_2   -0.017554
strategy     0.105002
dtype: float64
# 計算年化風險
data3[['returns_1','returns_2','strategy']].dropna().std() * 252 ** 0.5
returns_1    0.300306
returns_2    0.280425
strategy     0.068639
dtype: float64
# 策略累積收益率
data3['cumret'] = data3['strategy'].dropna().cumsum().apply(np.exp)
# 策略累積最大值
data3['cummax'] = data3['cumret'].cummax()
# 算回撤序列
drawdown = (data3['cummax'] - data3['cumret'])
# 算最大回撤
drawdown.max()
0.038159777097367176

策略的思考

  1. 對多隻ETF進行配對交易,是很多實盤量化基金的交易策略;

策略的風險和問題:

  1. Spread不迴歸的風險,當市場結構發生重大改變時,用過去歷史迴歸出來的Spread會發生不迴歸的重大風險;

  2. 中國市場做空受到限制,策略中有部分做空的收益是無法獲得的;

  3. 迴歸係數需要Rebalancing;

  4. 策略沒有考慮交易成本和其他成本;


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章