支持向量機 smo算法

#coding=utf-8
import re
from numpy import *
def load_data():
    data=[];label=[]
    o=open('test.txt')
    for line in o.readlines():
        line_arr=re.split(r'(-\d*|\d*)',line.strip())
        data.append([1.0,float(int(line_arr[1])),float(int(line_arr[3]))])
        label.append(int(line_arr[5]))
    return data,label  
class self_var:
    """函數間傳遞變量用,省得定義全局變量"""
    def __init__(self,data_matrix,label_matrix,c,toler):
        self.x=data_matrix
        self.y=label_matrix
        self.c=c
        self.toler=toler
        self.m,self.n=shape(data_matrix)
        self.alphas=mat(zeros((self.m,1)))
        self.b=0
        self.ecache=mat(zeros((self.m,2)))
def select_jrand(i,m):
    j=i
    while j==i:
        j=int(random.uniform(0,m))
    return j
def clip_alpha(aj,h,l):
    if aj>h:
        aj=h
    if l>aj:
        aj=l
    return aj
def clac_ek(sv,k):
    fxk=float(multiply(sv.alphas,sv.y).T*(sv.x*sv.x[k,:].T))+sv.b
    ek=fxk-float(sv.y[k])
    return ek
def select_j(i,sv,ei):
    max_k=-1;max_delta_e=0;ej=0
    sv.ecache[i]=[1,ei]
    valid_ecache_list=nonzero(sv.ecache[:,0].A)[0]
    if len(valid_ecache_list)>1:
        for k in valid_ecache_list:
            if k==i:continue
            ek=clac_ek(sv,k)
            delta_e=abs(ei-ek)
            if delta_e>max_delta_e:
                max_k=k;max_delta_e=delta_e;ej=ek
        return max_k,ej
    else:
        j=select_jrand(i,sv.m)
        ej=clac_ek(sv,j)
    return j,ej
def update_ek(sv,k):
    ek=clac_ek(sv,k)
    sv.ecache[k]=[1,ek]
def inner_loop(i,sv):
    ei=clac_ek(sv,i)
    if ((sv.y[i]*ei<-sv.toler)and(sv.alphas[i]<sv.c)) or \
       ((sv.y[i]*ei>sv.toler)and(sv.alphas[i]>0)):
        j,ej=select_j(i,sv,ei)
        alpha_iold=sv.alphas[i].copy()
        alpha_jold=sv.alphas[j].copy()
        if sv.y[i] != sv.y[j]:
            l=max(0,sv.alphas[j]-sv.alphas[i])
            h=min(sv.c,sv.c+sv.alphas[j]-sv.alphas[i])
        else:
            l=max(0,sv.alphas[j]+sv.alphas[i]-sv.c)
            h=min(sv.c,sv.alphas[j]+sv.alphas[i])
        if l==h:print 'l==h';return 0
        eta=2.0*sv.x[i,:]*sv.x[j,:].T-sv.x[i,:]*sv.x[i,:].T-sv.x[j,:]*sv.x[j,:].T
        if eta>=0:print 'eta>=0';return 0
        sv.alphas[j]-=sv.y[j]*(ei-ej)/eta
        sv.alphas[j]=clip_alpha(sv.alphas[j],h,l)
        update_ek(sv,j)
        if abs(sv.alphas[j]-alpha_jold)<0.0001:
            print 'j not moving enough';return 0
        sv.alphas[i]+=sv.y[j]*sv.y[i]*(alpha_jold-sv.alphas[j])
        update_ek(sv,i)
        b1=sv.b-ei-sv.y[i]*(sv.alphas[i]-alpha_iold)*sv.x[i,:]*sv.x[i,:].T\
            -sv.y[j]*(sv.alphas[j]-alpha_jold)*sv.x[i,:]*sv.x[j,:].T
        b2=sv.b-ej-sv.y[i]*(sv.alphas[i]-alpha_iold)*sv.x[i,:]*sv.x[j,:].T\
            -sv.y[j]*(sv.alphas[j]-alpha_jold)*sv.x[j,:]*sv.x[j,:].T
        if 0<sv.alphas[i] and sv.c>sv.alphas[i]:sv.b=b1
        elif 0<sv.alphas[j] and sv.c>sv.alphas[j]:sv.b=b2
        else:sv.b=(b1+b2)/2.0
        return 1
    else:
        return 0
def smop(data_matrix,label_matrix,c,toler,max_iter,ktup=('lin',0)):
    sv=self_var(data_matrix,label_matrix.T,c,toler)
    ite=0
    entire_set=True;alpha_pairs_changed=0
    while ite<max_iter and (alpha_pairs_changed>0 or entire_set):
        alpha_pairs_changed=0
        if entire_set:
            for i in range(sv.m):
                alpha_pairs_changed+=inner_loop(i,sv)
            print 'fullset,iter:%d i:%d,pairs changed %d'%(ite,i,alpha_pairs_changed)
            ite+=1
        else:
            non_boundis=nonzero((sv.alphas.A>0)*(sv.alphas.A<c))[0]#
            for i in non_boundis:
                alpha_pairs_changed+=inner_loop(i,sv)
                print 'non-bound,iter:%d i:%d,paris changed %d'%(ite,i,alpha_pairs_changed)
            ite+=1
        if entire_set:entire_set=False
        elif alpha_pairs_changed==0:entire_set=True
        print 'iteration number: %d'%ite
    return sv.b,sv.alphas
def calc_ws(m,n,b,alphas,data_matrix,label_matrix):
    w=zeros((n,1))
    c1=0
    c2=0
    for i in range(m):
        w+=multiply(alphas[i]*label_matrix.T[i],data_matrix[i,:].T)
    for i in data_matrix:
        a=i*mat(w)+b#####abs(a)其實是樣本點到超平面的距離。。
        if a>0:
            print '1'
            c1+=1
        else:
            print '-1'
            c2+=1
    print c1,c2
def test():
    data,label=load_data()
    data_matrix=mat(data)
    label_matrix=mat(label)
    m,n=shape(data_matrix)
    b,alphas=smop(data_matrix,label_matrix,0.6,0.001,40)
    calc_ws(m,n,b,alphas,data_matrix,label_matrix)
###每次運行判斷結果都是不同的。
</pre><pre name="code" class="python">
test.txt:
[2, 4, '1']
[5, 8, '1']
[8, 5, '1']
[9, 9, '1']
[4, 1, '1']
[-1, -1, '1']
[5, 8, '1']
[9, 3, '1']
[1, 8, '1']
[7, 3, '1']
[9, 3, '1']
[3, 8, '1']
[4, 6, '1']
[9, 7, '1']
[7, 1, '1']
[5, 2, '1']
[9, 6, '1']
[6, 9, '1']
[9, 8, '1']
[7, -1, '1']
[4, 5, '1']
[9, 8, '1']
[-1, 4, '1']
[4, 3, '1']
[6, -1, '1']
[9, 9, '1']
[-1, 3, '1']
[9, 8, '1']
[1, 7, '1']
[5, 8, '1']
[7, 8, '1']
[1, 5, '1']
[-1, 7, '1']
[1, 9, '1']
[7, 8, '1']
[2, 5, '1']
[7, 4, '1']
[2, 1, '1']
[6, 1, '1']
[-1, 1, '1']
[2, 4, '1']
[6, -1, '1']
[8, -1, '1']
[4, 9, '1']
[8, 3, '1']
[9, 8, '1']
[8, 9, '1']
[5, 9, '1']
[9, 6, '1']
[4, 2, '1']
[8, 7, '1']
[1, 9, '1']
[3, 8, '1']
[-1, 1, '1']
[1, 1, '1']
[-1, 9, '1']
[-1, 6, '1']
[1, 5, '1']
[2, 6, '1']
[9, 5, '1']
[5, -1, '1']
[2, 4, '1']
[5, 9, '1']
[9, 5, '1']
[6, 3, '1']
[9, 3, '1']
[3, 6, '1']
[8, 6, '1']
[7, 7, '1']
[-1, -1, '1']
[5, 4, '1']
[2, 9, '1']
[5, 7, '1']
[3, 9, '1']
[6, 9, '1']
[8, 2, '1']
[8, 3, '1']
[8, -1, '1']
[2, 4, '1']
[9, 2, '1']
[-1, 3, '1']
[6, 8, '1']
[5, 4, '1']
[5, -1, '1']
[5, 3, '1']
[7, 6, '1']
[-1, 4, '1']
[3, 9, '1']
[7, 5, '1']
[8, 3, '1']
[9, 7, '1']
[8, 3, '1']
[3, 5, '1']
[2, 6, '1']
[1, 9, '1']
[6, 2, '1']
[3, 5, '1']
[9, 7, '1']
[5, 6, '1']
[7, 2, '1']
[11, -1, '-1']
[13, 7, '-1']
[16, 5, '-1']
[11, -1, '-1']
[17, 6, '-1']
[9, 5, '-1']
[15, 1, '-1']
[13, 7, '-1']
[12, 6, '-1']
[9, 5, '-1']
[17, 4, '-1']
[10, 8, '-1']
[10, 8, '-1']
[9, 5, '-1']
[13, 2, '-1']
[13, 6, '-1']
[9, -1, '-1']
[11, 9, '-1']
[17, 2, '-1']
[9, 7, '-1']
[16, 4, '-1']
[12, 1, '-1']
[11, 8, '-1']
[10, 1, '-1']
[17, 7, '-1']
[12, -1, '-1']
[16, 5, '-1']
[18, 2, '-1']
[15, 6, '-1']
[9, 5, '-1']
[13, 4, '-1']
[13, 2, '-1']
[11, 2, '-1']
[17, 7, '-1']
[16, 1, '-1']
[15, -1, '-1']
[9, 4, '-1']
[16, 7, '-1']
[13, 1, '-1']
[17, -1, '-1']
[18, 4, '-1']
[12, 3, '-1']
[11, 7, '-1']
[14, 6, '-1']
[9, 5, '-1']
[11, 9, '-1']
[12, 4, '-1']
[17, 8, '-1']
[11, 2, '-1']
[12, 5, '-1']
[13, -1, '-1']
[12, 2, '-1']
[11, 1, '-1']
[14, 1, '-1']
[17, -1, '-1']
[18, 3, '-1']
[11, 5, '-1']
[18, 2, '-1']
[12, 4, '-1']
[15, 8, '-1']
[17, 9, '-1']
[18, 5, '-1']
[14, 9, '-1']
[16, 9, '-1']
[18, 5, '-1']
[9, 1, '-1']
[14, 4, '-1']
[13, 2, '-1']
[12, 9, '-1']
[16, 8, '-1']
[15, 4, '-1']
[12, -1, '-1']
[16, 9, '-1']
[14, 3, '-1']
[12, 9, '-1']
[17, 5, '-1']
[11, 4, '-1']
[13, 6, '-1']
[16, 3, '-1']
[16, 2, '-1']
[11, 3, '-1']
[11, 1, '-1']
[17, 9, '-1']
[18, 2, '-1']
[11, 8, '-1']
[14, 3, '-1']
[11, -1, '-1']
[18, 6, '-1']
[12, 6, '-1']
[11, -1, '-1']
[14, -1, '-1']
[16, 5, '-1']
[12, 7, '-1']
[15, -1, '-1']
[15, 1, '-1']
[18, 9, '-1']
[9, -1, '-1']
[18, -1, '-1']
[18, 6, '-1']
[9, 3, '-1']

test2,txt:

[1, 2, 1]
[3, 2, -1]
[1, 3, 1]
[3, 4, -1]
[1, 6, 1]
[3, 8, -1]

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章