關注微信公共號:小程在線
關注CSDN博客:程志偉的博客
R版本:3.6.1
e1701包:用於支持向量機模型
SVM函數:利用數據構建支持向量機模型
> library('e1071')
Warning message:
程輯包‘e1071’是用R版本3.6.2 來建造的
> setwd('G:\\R語言\\大三下半年\\數據挖掘:R語言實戰\\')
> data("iris")
> summary(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width
Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
Median :5.800 Median :3.000 Median :4.350 Median :1.300
Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
Species
setosa :50
versicolor:50
virginica :50
> ######################### 應用案例 ######################
#第一種格式
> data("iris")
> model <- svm(Species ~ . ,data = iris)
> summary(model)
Call:
svm(formula = Species ~ ., data = iris)
Parameters:
SVM-Type: C-classification
SVM-Kernel: radial
cost: 1
Number of Support Vectors: 51
( 8 22 21 )
Number of Classes: 3
Levels:
setosa versicolor virginica
#第二種格式
> x=iris[,-5]
> y=iris[,5]
> model <- svm(x,y,kernel = "radial", gamma = if(is.vector(x)) 1 else 1/ncol(x))
> summary(model)
Call:
svm.default(x = x, y = y, kernel = "radial", gamma = if (is.vector(x)) 1 else 1/ncol(x))
Parameters:
SVM-Type: C-classification
SVM-Kernel: radial
cost: 1
Number of Support Vectors: 51
( 8 22 21 )
Number of Classes: 3
Levels:
setosa versicolor virginica
#預測判別
> x=iris[,1:4]#確認需要進行預測的樣本特徵矩陣
> prd=predict(model,x)#根據模型model對x數據進行yuce
> prd[sample(1:150,5)]#隨機挑選8個預測結果進行展示
97 16 38 100 4
versicolor setosa setosa versicolor setosa
Levels: setosa versicolor virginica
#模型預測精度展示
> table(prd,y)
y
prd setosa versicolor virginica
setosa 50 0 0
versicolor 0 48 2
virginica 0 2 48
#綜合建模
> attach(iris)#將數據集iris按列單獨確認爲向量
> x=subset(iris,select=-Species)#除去Species
> y=Species
> type=c("C-classification","nu-classification","one-classification")#確定要使用的分類方式
> kernel=c("linear","polynomial","radial","sigmoid")#去誒的那個要使用的核函數
> pred=array(0,dim=c(150,3,4))#初始化預測結果矩陣的三維長度爲150,3,4
> accuracy=matrix(0,3,4)#初始化模型精準度矩陣的兩位分別爲3,4
> yy=as.integer(y)#將結果變量數量化爲1,2,3
> for(i in 1:3)#確認i影響的維度代表分類方式
+ {
+ for(j in 1:4)#確認j影響的維度代表和函數
+ {
+ pred[,i,j]=predict(svm(x,y,type=type[i],kernel=kernel[j]),x)#對每一模型進行預測
+ if(i>2) accuracy[i,j]=sum(pred[,i,j]!=1)
+ else accuracy[i,j]=sum(pred[,i,j]!=yy)
+ }
+ }
Warning messages:
1: In Ops.factor(yorig, ret$fitted) : ‘-’ not meaningful for factors
2: In Ops.factor(yorig, ret$fitted) : ‘-’ not meaningful for factors
3: In Ops.factor(yorig, ret$fitted) : ‘-’ not meaningful for factors
4: In Ops.factor(yorig, ret$fitted) : ‘-’ not meaningful for factors
#確定模型精度變量的列名和行名
#看到C-classification 和radial最小的組合爲4
> dimnames(accuracy)=list(type,kernel)
> accuracy
linear polynomial radial sigmoid
C-classification 5 7 4 17
nu-classification 5 14 5 12
one-classification 102 75 76 75
> table(pred[,1,3],y)
y
setosa versicolor virginica
1 50 0 0
2 0 48 2
3 0 2 48
> plot(cmdscale(dist(iris[,-5])),
+ col=c("lightgray","black","gray")[as.integer(iris[,5])],
+ pch=c("o","+")[1:150 %in% model$index+1])#繪製模型分類散點圖
> legend(2,-0.4,c("setosa","versicolor","virginica"),
+ col=c("lightgray","black","gray"),lty=1)#標記圖例
#優化建模
> data(iris)
> wts=c(1,1,1)#確定模型的各個類別比重爲1:1:1
> names(wts)=c("setosa","versicolor","virginica")#確定各個比重對應的類別
> model1=svm(x,y,class.weights=wts)#建立模型
#確定模型的各個類別比重爲1:100:100
> wts=c(1,100,100)
> names(wts)=c("setosa","versicolor","virginica")#確定各個比重對應的類別
> model2=svm(x,y,class.weights=wts)
> pred2=predict(model2,x)#根據模型進行預測
> table(pred2,y)#展示預測結果
y
pred2 setosa versicolor virginica
setosa 50 0 0
versicolor 0 49 1
virginica 0 1 49
#確定模型各個類別的比重1:500:500
> wts=c(1,500,500)
> names(wts)=c("setosa","versicolor","virginica")#確定各個比重對應的類別
> model3=svm(x,y,class.weights =wts)
> pred3=predict(model3,x)
> table(pred3,y)#在實際構建模型時可以改變各類樣本的權重比例來提高模型預測精度
y
pred3 setosa versicolor virginica
setosa 50 0 0
versicolor 0 50 0
virginica 0 0 50