配對交易策略 Pair Trading
0. 引庫
import pandas as pd
import numpy as np
import tushare as ts
import seaborn
from matplotlib import pyplot as plt
plt. style. use( 'seaborn' )
% matplotlib inline
data = pd. read_csv( 'pair-trade-data.csv' )
data. set_index( 'date' , inplace = True )
data. head( )
000568
000858
date
2010/1/4
27.488118
26.117536
2010/1/5
27.335123
26.391583
2010/1/6
26.941707
25.694008
2010/1/7
26.388011
24.913389
2010/1/8
26.825140
24.863562
data. plot( figsize= ( 8 , 6 ) ) ;
2. 策略開發思路
data[ 'priceDelta' ] = data[ '000568' ] - data[ '000858' ]
data. head( )
000568
000858
priceDelta
date
2010/1/4
27.488118
26.117536
1.370582
2010/1/5
27.335123
26.391583
0.943540
2010/1/6
26.941707
25.694008
1.247699
2010/1/7
26.388011
24.913389
1.474622
2010/1/8
26.825140
24.863562
1.961578
data[ 'priceDelta' ] . plot( figsize= ( 8 , 6 ) ) ;
plt. ylabel( 'Spread' )
plt. axhline( data[ 'priceDelta' ] . mean( ) ) ;
data[ 'zscore' ] = ( data[ 'priceDelta' ] - np. mean( data[ 'priceDelta' ] ) ) / np. std( data[ 'priceDelta' ] )
data. head( )
000568
000858
priceDelta
zscore
date
2010/1/4
27.488118
26.117536
1.370582
0.569895
2010/1/5
27.335123
26.391583
0.943540
0.500520
2010/1/6
26.941707
25.694008
1.247699
0.549932
2010/1/7
26.388011
24.913389
1.474622
0.586796
2010/1/8
26.825140
24.863562
1.961578
0.665903
len ( data[ data[ 'zscore' ] > 1.5 ] )
17
data[ 'position_1' ] = np. where( data[ 'zscore' ] > 1.5 , - 1 , np. nan)
data[ 'position_1' ] = np. where( data[ 'zscore' ] < - 1.5 , 1 , data[ 'position_1' ] )
data[ 'position_1' ] = np. where( abs ( data[ 'zscore' ] ) < 0.5 , 0 , data[ 'position_1' ] )
data. head( )
000568
000858
priceDelta
zscore
position_1
date
2010/1/4
27.488118
26.117536
1.370582
0.569895
NaN
2010/1/5
27.335123
26.391583
0.943540
0.500520
NaN
2010/1/6
26.941707
25.694008
1.247699
0.549932
NaN
2010/1/7
26.388011
24.913389
1.474622
0.586796
NaN
2010/1/8
26.825140
24.863562
1.961578
0.665903
NaN
產生交易信號
data[ 'position_1' ] = data[ 'position_1' ] . ffill( ) . fillna( 0 )
data[ 'position_1' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) ) ;
data[ 'position_2' ] = - np. sign( data[ 'position_1' ] )
data[ 'position_2' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) ) ;
3. 計算策略年化收益並可視化
data[ 'returns_1' ] = ( np. log( data[ '000568' ] / data[ '000568' ] . shift( 1 ) ) ) . fillna( 0 )
data[ 'returns_2' ] = ( np. log( data[ '000858' ] / data[ '000858' ] . shift( 1 ) ) ) . fillna( 0 )
data. head( 10 )
000568
000858
priceDelta
zscore
position_1
position_2
returns_1
returns_2
date
2010/1/4
27.488118
26.117536
1.370582
0.569895
0.0
-0.0
0.000000
0.000000
2010/1/5
27.335123
26.391583
0.943540
0.500520
0.0
-0.0
-0.005581
0.010438
2010/1/6
26.941707
25.694008
1.247699
0.549932
0.0
-0.0
-0.014497
-0.026787
2010/1/7
26.388011
24.913389
1.474622
0.586796
0.0
-0.0
-0.020766
-0.030852
2010/1/8
26.825140
24.863562
1.961578
0.665903
0.0
-0.0
0.016430
-0.002002
2010/1/11
25.936311
24.631037
1.305274
0.559285
0.0
-0.0
-0.033696
-0.009396
2010/1/12
26.409867
25.336916
1.072951
0.521543
0.0
-0.0
0.018094
0.028255
2010/1/13
26.577433
25.137609
1.439824
0.581143
0.0
-0.0
0.006325
-0.007897
2010/1/14
28.420660
26.109231
2.311428
0.722738
0.0
-0.0
0.067054
0.037924
2010/1/15
28.253094
26.208885
2.044209
0.679327
0.0
-0.0
-0.005913
0.003810
data[ 'strategy' ] = 0.5 * ( data[ 'position_1' ] . shift( 1 ) * data[ 'returns_1' ] ) + 0.5 * ( data[ 'position_2' ] . shift( 1 ) * data[ 'returns_2' ] )
data[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . tail( 1 )
returns_1
returns_2
strategy
date
2019/4/8
2.470158
3.837651
0.986754
data[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . plot( figsize= ( 10 , 6 ) ) ;
Pair trading 策略 - 小範圍時間(2013.6-2014.12)
data2 = pd. read_csv( 'pair-trade-data2.csv' )
data2. set_index( 'date' , inplace = True )
data2. head( )
000568
000858
date
2013/6/3
20.719056
20.343053
2013/6/4
20.357220
20.060867
2013/6/5
20.514540
20.274644
2013/6/6
20.113374
20.172031
2013/6/7
19.704342
19.667508
data2. plot( figsize= ( 8 , 6 ) ) ;
data2[ 'priceDelta' ] = data[ '000568' ] - data[ '000858' ]
data2. head( )
000568
000858
priceDelta
date
2013/6/3
20.719056
20.343053
0.376004
2013/6/4
20.357220
20.060867
0.296353
2013/6/5
20.514540
20.274644
0.239896
2013/6/6
20.113374
20.172031
-0.058657
2013/6/7
19.704342
19.667508
0.036833
data2[ 'priceDelta' ] . plot( figsize= ( 8 , 6 ) ) ;
plt. ylabel( 'Spread' )
plt. axhline( data2[ 'priceDelta' ] . mean( ) ) ;
data2[ 'zscore' ] = ( data2[ 'priceDelta' ] - np. mean( data2[ 'priceDelta' ] ) ) / np. std( data2[ 'priceDelta' ] )
data2. head( )
000568
000858
priceDelta
zscore
date
2013/6/3
20.719056
20.343053
0.376004
0.048513
2013/6/4
20.357220
20.060867
0.296353
0.000596
2013/6/5
20.514540
20.274644
0.239896
-0.033369
2013/6/6
20.113374
20.172031
-0.058657
-0.212979
2013/6/7
19.704342
19.667508
0.036833
-0.155532
len ( data2[ data2[ 'zscore' ] > 1.5 ] )
40
len ( data2[ data2[ 'zscore' ] < - 1.5 ] )
16
data2[ 'position_1' ] = np. where( data2[ 'zscore' ] > 1.5 , - 1 , np. nan)
data2[ 'position_1' ] = np. where( data2[ 'zscore' ] < - 1.5 , 1 , data2[ 'position_1' ] )
data2[ 'position_1' ] = np. where( abs ( data2[ 'zscore' ] ) < 0.5 , 0 , data2[ 'position_1' ] )
data2. head( )
000568
000858
priceDelta
zscore
position_1
date
2013/6/3
20.719056
20.343053
0.376004
0.048513
0.0
2013/6/4
20.357220
20.060867
0.296353
0.000596
0.0
2013/6/5
20.514540
20.274644
0.239896
-0.033369
0.0
2013/6/6
20.113374
20.172031
-0.058657
-0.212979
0.0
2013/6/7
19.704342
19.667508
0.036833
-0.155532
0.0
data2[ 'position_1' ] = data2[ 'position_1' ] . ffill( ) . fillna( 0 )
data2[ 'position_1' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) ) ;
data2[ 'position_2' ] = - np. sign( data2[ 'position_1' ] )
data2[ 'position_2' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) ) ;
data2[ 'returns_1' ] = ( np. log( data2[ '000568' ] / data2[ '000568' ] . shift( 1 ) ) ) . fillna( 0 )
data2[ 'returns_2' ] = ( np. log( data2[ '000858' ] / data2[ '000858' ] . shift( 1 ) ) ) . fillna( 0 )
data2. head( 10 )
000568
000858
priceDelta
zscore
position_1
position_2
returns_1
returns_2
date
2013/6/3
20.719056
20.343053
0.376004
0.048513
0.0
-0.0
0.000000
0.000000
2013/6/4
20.357220
20.060867
0.296353
0.000596
0.0
-0.0
-0.017618
-0.013968
2013/6/5
20.514540
20.274644
0.239896
-0.033369
0.0
-0.0
0.007698
0.010600
2013/6/6
20.113374
20.172031
-0.058657
-0.212979
0.0
-0.0
-0.019749
-0.005074
2013/6/7
19.704342
19.667508
0.036833
-0.155532
0.0
-0.0
-0.020546
-0.025329
2013/6/13
19.562754
19.012515
0.550239
0.153334
0.0
-0.0
-0.007212
-0.033871
2013/6/14
19.617816
19.012515
0.605301
0.186459
0.0
-0.0
0.002811
0.000000
2013/6/17
19.255979
18.720423
0.535556
0.144501
0.0
-0.0
-0.018616
-0.015482
2013/6/18
19.405434
18.853192
0.552241
0.154539
0.0
-0.0
0.007731
0.007067
2013/6/19
19.956054
19.269202
0.686852
0.235521
0.0
-0.0
0.027979
0.021826
data2[ 'strategy' ] = 0.5 * ( data2[ 'position_1' ] . shift( 1 ) * data2[ 'returns_1' ] ) + 0.5 * ( data2[ 'position_2' ] . shift( 1 ) * data2[ 'returns_2' ] )
data2[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . tail( 1 )
returns_1
returns_2
strategy
date
2014/12/31
0.892955
0.97347
1.12623
data2[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . plot( figsize= ( 10 , 6 ) ) ;
data2[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . mean( ) * 252
returns_1 -0.073915
returns_2 -0.017554
strategy 0.077608
dtype: float64
data2[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . std( ) * 252 ** 0.5
returns_1 0.300306
returns_2 0.280425
strategy 0.057016
dtype: float64
data2[ 'cumret' ] = data2[ 'strategy' ] . dropna( ) . cumsum( ) . apply ( np. exp)
data2[ 'cummax' ] = data2[ 'cumret' ] . cummax( )
drawdown = ( data2[ 'cummax' ] - data2[ 'cumret' ] )
drawdown. max ( )
0.03645280148896235
Pair trading 策略 - 考慮時間序列平穩性
import pandas as pd
import numpy as np
import tushare as ts
import seaborn
from matplotlib import pyplot as plt
plt. style. use( 'seaborn' )
% matplotlib inline
1. 數據準備
data3 = pd. read_csv( 'pair-trade-data2.csv' )
data3. set_index( 'date' , inplace = True )
data3. head( )
000568
000858
date
2013/6/3
20.719056
20.343053
2013/6/4
20.357220
20.060867
2013/6/5
20.514540
20.274644
2013/6/6
20.113374
20.172031
2013/6/7
19.704342
19.667508
data3. plot( figsize= ( 8 , 6 ) ) ;
2. 策略開發思路
data3. corr( )
000568
000858
000568
1.000000
0.552409
000858
0.552409
1.000000
plt. figure( figsize = ( 10 , 8 ) )
plt. title( 'Stock Correlation' )
plt. plot( data[ '000568' ] , data[ '000858' ] , '.' ) ;
plt. xlabel( '000568' )
plt. ylabel( '000858' )
data. dropna( inplace = True )
[ slope, intercept] = np. polyfit( data3. iloc[ : , 0 ] , data3. iloc[ : , 1 ] , 1 ) . round ( 2 )
slope, intercept
(0.51, 7.82)
data3[ 'spread' ] = data3. iloc[ : , 1 ] - ( data3. iloc[ : , 0 ] * slope + intercept)
data3. head( )
000568
000858
spread
date
2013/6/3
20.719056
20.343053
1.956334
2013/6/4
20.357220
20.060867
1.858684
2013/6/5
20.514540
20.274644
1.992228
2013/6/6
20.113374
20.172031
2.094210
2013/6/7
19.704342
19.667508
1.798294
data3[ 'spread' ] . plot( figsize = ( 10 , 8 ) , title = 'Price Spread' ) ;
data3[ 'zscore' ] = ( data3[ 'spread' ] - data3[ 'spread' ] . mean( ) ) / data3[ 'spread' ] . std( )
data3. head( )
000568
000858
spread
zscore
date
2013/6/3
20.719056
20.343053
1.956334
1.452385
2013/6/4
20.357220
20.060867
1.858684
1.382488
2013/6/5
20.514540
20.274644
1.992228
1.478078
2013/6/6
20.113374
20.172031
2.094210
1.551075
2013/6/7
19.704342
19.667508
1.798294
1.339261
data3[ 'zscore' ] . plot( figsize = ( 10 , 8 ) , title = 'Z-score' )
plt. axhline( 1.5 )
plt. axhline( 0 )
plt. axhline( - 1.5 )
<matplotlib.lines.Line2D at 0xcb62632e8>
產生交易信號
data3[ 'position_1' ] = np. where( data3[ 'zscore' ] > 1.5 , 1 , np. nan)
data3[ 'position_1' ] = np. where( data3[ 'zscore' ] < - 1.5 , - 1 , data3[ 'position_1' ] )
data3[ 'position_1' ] = np. where( abs ( data3[ 'zscore' ] ) < 0.5 , 0 , data3[ 'position_1' ] )
data3[ 'position_1' ] = data3[ 'position_1' ] . ffill( ) . fillna( 0 )
data3[ 'position_1' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) , title = 'Trading Signal_Uptrade' ) ;
data3[ 'position_2' ] = - np. sign( data3[ 'position_1' ] )
data3[ 'position_2' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) , title = 'Trading Signal_Downtrade' ) ;
3. 計算策略年化收益並可視化
data3[ 'returns_1' ] = np. log( data3[ '000568' ] / data3[ '000568' ] . shift( 1 ) )
data3[ 'returns_2' ] = np. log( data3[ '000858' ] / data3[ '000858' ] . shift( 1 ) )
data3[ 'strategy' ] = 0.5 * ( data3[ 'position_1' ] . shift( 1 ) * data3[ 'returns_1' ] ) + 0.5 * ( data3[ 'position_2' ] . shift( 1 ) * data3[ 'returns_2' ] )
data3[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . tail( 1 )
returns_1
returns_2
strategy
date
2014/12/31
0.892955
0.97347
1.174494
data3[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . plot( figsize= ( 10 , 8 ) , title = 'Strategy_Backtesting' ) ;
data3[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . mean( ) * 252
returns_1 -0.073915
returns_2 -0.017554
strategy 0.105002
dtype: float64
data3[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . std( ) * 252 ** 0.5
returns_1 0.300306
returns_2 0.280425
strategy 0.068639
dtype: float64
data3[ 'cumret' ] = data3[ 'strategy' ] . dropna( ) . cumsum( ) . apply ( np. exp)
data3[ 'cummax' ] = data3[ 'cumret' ] . cummax( )
drawdown = ( data3[ 'cummax' ] - data3[ 'cumret' ] )
drawdown. max ( )
0.038159777097367176
策略的思考
對多隻ETF進行配對交易,是很多實盤量化基金的交易策略;
策略的風險和問題:
Spread不迴歸的風險,當市場結構發生重大改變時,用過去歷史迴歸出來的Spread會發生不迴歸的重大風險;
中國市場做空受到限制,策略中有部分做空的收益是無法獲得的;
迴歸係數需要Rebalancing;
策略沒有考慮交易成本和其他成本;