from sklearn.neural_network import MLPClassifier
%matplotlib inline
import os
import numpy as np
from scipy import stats
import pandas as pd
import sklearn.cross_validation as cross_validation
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
#os.chdir('Q:/data')
pd.set_option('display.max_columns',None)
/home/quant/anaconda2/lib/python2.7/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
data = pd.read_csv('telecom_churn.csv')
data.head()
subscriberID
churn
gender
AGE
edu_class
incomeCode
duration
feton
peakMinAv
peakMinDiff
posTrend
negTrend
nrProm
prom
curPlan
avgplan
planChange
posPlanChange
negPlanChange
call_10000
0
19164958
1
0
20
2
12
16
0
113.666667
-8.0
0
1
0
0
1
1
0
0
0
0
1
39244924
1
1
20
0
21
5
0
274.000000
-371.0
0
1
2
1
3
2
2
1
0
1
2
39578413
1
0
11
1
47
3
0
392.000000
-784.0
0
1
0
0
3
3
0
0
0
1
3
40992265
1
0
43
0
4
12
0
31.000000
-76.0
0
1
2
1
3
3
0
0
0
1
4
43061957
1
1
60
0
9
14
0
129.333333
-334.0
0
1
0
0
3
3
0
0
0
0
#隨機抽樣,建立訓練集與測試集
train, test = cross_validation.train_test_split(data, test_size=1000)