前置準備機器學習應用-決策樹二元分類算法
import matplotlib.pyplot as plt
def showchart(df,evalparm ,barData,lineData,yMin,yMax):
ax = df[barData].plot(kind='bar', title =evalparm,
figsize=(10,6),legend=True, fontsize=12)
ax.set_xlabel(evalparm,fontsize=12)
ax.set_ylim([yMin,yMax])
ax.set_ylabel(barData,fontsize=12)
ax2 = ax.twinx()
ax2.plot(df[lineData ].values, linestyle='-', marker='o',
linewidth=2.0,color='r')
plt.show()
#定義evalParameter函數
def evalParameter(trainData, validationData, evalparm,
impurityList, maxDepthList, maxBinsList):
#訓練評估參數
metrics = [trainEvaluateModel(trainData, validationData,
impurity,maxDepth, maxBins )
for impurity in impurityList
for maxDepth in maxDepthList
for maxBins in maxBinsList ]
#設置當前評估的參數
if evalparm=="impurity":
IndexList=impurityList[:]
elif evalparm=="maxDepth":
IndexList=maxDepthList[:]
elif evalparm=="maxBins":
IndexList=maxBinsList[:]
#轉換爲Pandas DataFrame
df = pd.DataFrame(metrics,index=IndexList,
columns=['AUC', 'duration','impurity', 'maxDepth', 'maxBins','model'])
#顯示圖形
showchart(df,evalparm,'AUC','duration',0.5,0.7 )
evalParameter(trainData, validationData,"impurity",
impurityList=["gini", "entropy"],
maxDepthList=[10],
maxBinsList=[10 ])
evalParameter(trainData, validationData,"maxDepth",
impurityList=["gini"],
maxDepthList=[3, 5, 10, 15, 20, 25],
maxBinsList=[10])