1,Graphvi安装
[1]安装包下载地址见此链接。[科学上网]下载速度比较快
[2]下载完msi之后,安装在这个anaconda的路径下。我的路径是
E:\anaconda1\Lib\site-packages\
然后就安装就好了,首先测试一下pyhton是否能够 import。下面截图说明可以用。
然后找一下Graphviz.exe的路径,直接在开始界面找一下路径就行了[anaconda包太多了不好找],路径是
E:\anaconda1\Lib\site-packages\bin\
把上面的路径添加到系统变量中。然后再测试这个是否安装好。
dot -version 下面这样就是可以用。
2,决策树的使用
没办法给数据,请自己拿纸鸢花试一试吧。
import pandas as pd
from sklearn import preprocessing
from sklearn.decomposition import PCA
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import metrics as ms
from sklearn import tree
import graphviz
data = pd.read_csv('C:\\Users\\happy\\Desktop\\t.csv')
y = data['label']
x = data[:,1:30]
########预处理#############
min_max_scaler = preprocessing.MinMaxScaler()
X_train = min_max_scaler.fit_transform(x)
pca = PCA(n_components=20) #保留的特征 PCA降维
X_train = pca.fit_transform(X_train)
########## 随机打乱#################
def dataset_split(X_train,y):#将训练集和样本分别代入函数
x_train, x_test, y_train, y_test = train_test_split(X_train,y,\
test_size=0.1,random_state=0)
return x_train, x_test, y_train, y_test #返回训练样本,测试样本,训练标签,测试标签
x_train, x_test, y_train, y_test = dataset_split(X_train,y)
###############模型###########
dtc = tree.DecisionTreeClassifier(criterion="entropy")
clf = dtc.fit(x_train, y_train)
#print(clf.predict(x_test))
#print(y_test)
print('精确率',ms.precision_score(y_test, clf.predict(x_test),average='micro'))
dot_data = tree.export_graphviz(clf, out_file=None)
#print(dot_data)
graph = graphviz.Source(dot_data)
#print(graph)
os.environ["PATH"] += os.pathsep + 'C:\\Users\\happy\\Desktop\\'
graph.render("test", view=True)
最后的图[未显示全部]: