
第十一 講 使用神經網絡進行客戶流失預警

from sklearn.neural_network import MLPClassifier
%matplotlib inline
import os
import numpy as np
from scipy import stats
import pandas as pd
import sklearn.cross_validation as cross_validation
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)
/home/quant/anaconda2/lib/python2.7/site-packages/sklearn/ DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)
data = pd.read_csv('telecom_churn.csv')
subscriberID churn gender AGE edu_class incomeCode duration feton peakMinAv peakMinDiff posTrend negTrend nrProm prom curPlan avgplan planChange posPlanChange negPlanChange call_10000
0 19164958 1 0 20 2 12 16 0 113.666667 -8.0 0 1 0 0 1 1 0 0 0 0
1 39244924 1 1 20 0 21 5 0 274.000000 -371.0 0 1 2 1 3 2 2 1 0 1
2 39578413 1 0 11 1 47 3 0 392.000000 -784.0 0 1 0 0 3 3 0 0 0 1
3 40992265 1 0 43 0 4 12 0 31.000000 -76.0 0 1 2 1 3 3 0 0 0 1
4 43061957 1 1 60 0 9 14 0 129.333333 -334.0 0 1 0 0 3 3 0 0 0 0
train, test = cross_validation.train_test_split(data, test_size=1000)
from sklearn import preprocessing
train_X = train.ix[:, 3:-1]
test_X = test.ix[:, 3:-1]
scaler = preprocessing.MinMaxScaler().fit(train_X)
train_X = scaler.transform(train_X)
test_X = scaler.transform(test_X)
train_Y = train['churn'].get_values()  # 爲滿足後續(pybrain)建模需要做相應變換
test_Y = test['churn'].get_values()

clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(100), random_state=1), train_Y) 
MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=100, learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,


pd.crosstab(test_Y, test_Y_pred)
col_0 0 1
0 450 94
1 85 371
from import Validator

Validator.classificationPerformance( test_Y_pred, test_Y )
import sklearn.metrics as metrics
print metrics.classification_report(test_Y, test_Y_pred)
             precision    recall  f1-score   support

          0       0.84      0.83      0.83       544
          1       0.80      0.81      0.81       456

avg / total       0.82      0.82      0.82      1000

ROC Curve

fpr_test, tpr_test, th_test = metrics.roc_curve(test_Y, test_est_p)
fpr_train, tpr_train, th_train = metrics.roc_curve(train_Y, train_est_p)
plt.plot(fpr_test, tpr_test,color='red')
plt.plot(fpr_train, tpr_train,color='black')
plt.title('ROC curve')
<matplotlib.text.Text at 0x7f8847226990>


還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.