西瓜書習題3.3,3.4 Based on TensorFlow

3.3 西瓜書聚集Logistic迴歸分類



Density Sugar Quality
0.697 0.46 1
0.774 0.376 1
0.634 0.264 1
0.608 0.318 1
0.556 0.215 1
0.403 0.237 1
0.481 0.149 1
0.437 0.211 1
0.666 0.091 0
0.243 0.267 0
0.245 0.057 0
0.343 0.099 0
0.639 0.161 0
0.657 0.198 0
0.36 0.37 0
0.593 0.042 0
0.719 0.103 0


import tensorflow as tf
import xlrd
import math
import matplotlib.pyplot as plt
import numpy as np

data = xlrd.open_workbook('./wmdata.xlsx')
sheet = data.sheet_by_index(0)
Den = sheet.col_values(0)
Sug = sheet.col_values(1)
Res = sheet.col_values(2)

# train data
Train_X = np.array([Den, Sug])
Train_X = np.transpose(Train_X)
Train_Y = np.reshape(np.array(Res), (len(np.array(Res)), 1))

# Logistic Regression
X = tf.placeholder(tf.float32, [None, 2]) # n * 2
Y = tf.placeholder(tf.float32, [None, 1]) # n * 1
w = tf.Variable(tf.zeros([2, 1]), name = "weight") #  X * w + b = n * 1
b = tf.Variable(0.0, name = "bias")
loss =   - tf.matmul(Y, tf.matmul(X, w) + b, transpose_a = True) + len(Train_Y) * tf.reduce_mean(tf.log(1 + tf.exp(tf.matmul(X, w) + b)))
# loss = \sum_1 ^ m { - y_i * \beta * x_i + ln (1 + exp(\beta * x_i))}
predict = 1 / (1 + tf.exp(tf.matmul(X, w) + b))

# start 
alpha = 0.1 # learning rate
train_op = tf.train.GradientDescentOptimizer(alpha).minimize(loss)

init = tf.global_variables_initializer()
sess = tf.Session()
res = sess.run(init)
for i in range(3000):
    _, w_v, b_v = sess.run([train_op, w, b], feed_dict={X: Train_X, Y: Train_Y})
    if i % 50 == 0:
        print(sess.run(loss, feed_dict={X: Train_X, Y: Train_Y}))

sess.run(predict ,feed_dict={X: Train_X} )

plt.plot(Train_X[np.where(Train_Y > 0)[0]][:, 0], Train_X[np.where(Train_Y > 0)[0]][:, 1], "+r")
plt.plot(Train_X[np.where(Train_Y == 0)[0]][:, 0], Train_X[np.where(Train_Y == 0)[0]][:, 1], "*b")
plt.plot(Train_X[:, 0],  -w_v[0]/w_v[1] * Train_X[:, 0] - b_v/w_v[1])
# [x, y] * [w1; w2] + b = 0 w1 x + w2 y + b = 0 y = - w1 / w2 x - b


  • loss 函數好好寫,就不會出錯
  • tf.matmul 使用的矩陣乘法是正常的矩陣乘法。Mat1 * Mat2是matlab中的Mat1 .* Mat2
  • 設輸入序列x_i是一個n維變量。Logistic迴歸裏面的wX+b, 其實是有n+1個參數需要求解。但實際上若要確定n維空間中的分界面(超平面),只需要n個參數就夠了。so多求的一個參數的作用是什麼? 不是很理解




3.4 UCI數據集

選擇Iris數據集,轉成txt,實現Logistic迴歸。該數據集有3個屬性,4個自變量。考慮使用OvR(One versus Rest)法進行分類。


import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import math
import random

def readTxtData(Filename):
    Train_X = []
    Train_Y = []
    with open(Filename, "r") as f:
        for line in f:
            x = []
            iris = line.strip().split(",")
            for attr in iris[0:4]: # 0~3

            if iris[4]=="Iris-setosa":
            elif iris[4]=="Iris-versicolor":
            elif iris[4] == "Iris-virginica":
    return Train_X, Train_Y

def swap(a, b):
    return b, a

def randomSort(X, Y):
    Len = len(X)
    for k in range(Len):
        i = random.randint(0, Len-1)
        j = random.randint(0, Len-1)
        while i == j:
            j = random.randint(0, Len-1)
        X[i], X[j] = swap(X[i], X[j])
        Y[i], Y[j] = swap(Y[i], Y[j])
    return X, Y

def genTrainData(Train_X, Train_Y, i):# generate i th set of data out of 10
    Num = len(Train_Y) # number of training data
    Span = math.floor(Num / 10)
    TF_Test_X = Train_X[i * Span : (i + 1) * Span]
    TF_Test_Y = Train_Y[i * Span : (i + 1) * Span]
    TF_Train_X = np.append(Train_X[0 : i * Span], Train_X[(i + 1) * Span : -1])
    TF_Train_Y = np.append(Train_Y[0 : i * Span], Train_Y[(i + 1) * Span : -1])
    return TF_Train_X, TF_Train_Y, TF_Test_X, TF_Test_Y

def genTrainY(temp_Y, j):
    temp = []
    for i in range(len(temp_Y)):
        if temp_Y[i] == j:
    return np.reshape(np.array(temp), (len(temp), 1))

def runLogistic(Train_X, Train_Y, num):
    X = tf.placeholder(tf.float64, [None, 4]) # n * 4
    Y = tf.placeholder(tf.float64, [None, 1]) # n * 1
    w = tf.Variable(tf.zeros([4, 1],dtype = tf.float64), name = "weight", dtype = tf.float64) #  X * w + b = n * 1
    b = tf.Variable(0.0, name = "bias", dtype = tf.float64)
    # print(len(Train_Y))
    loss = - tf.matmul(Y, tf.matmul(X, w) + b, transpose_a = True)/num + tf.reduce_mean(tf.log(1 + tf.exp(tf.matmul(X, w) + b)))
    the cost function should better be \sum{blabla} / num, otherwise the optimization process may not converge
    alpha = 0.05 # learning rate
    train_op = tf.train.GradientDescentOptimizer(float(alpha)).minimize(loss)

    init = tf.global_variables_initializer()
    sess = tf.Session()
    res = sess.run(init)
    for k in range(500):
        _, w_v, b_v = sess.run([train_op, w, b], feed_dict={X: Train_X, Y: Train_Y})
        if k % 100 == 0:
            print(k, "th loss is", sess.run(loss, feed_dict={X: Train_X, Y: Train_Y}))

    return w_v, b_v

def runTest(Test_X, Test_Y, W, B, classNum):
    # run this for num  times
    value = []
    faul = 0
    for i in range(classNum):
        w_v = W[i]
        b_v = B[i]
        val = 1 / ( 1 + np.exp( - (np.array(TF_Test_X).dot(w_v) + b_v)))

    value = np.array(value)
    for i in range(len(Test_X)):
        # find the max corresponding classification
        j = (np.where(value[:, i] == np.max(value[:, i])))[0][0]
        if j != Test_Y[i]:
            faul = faul + 1

    return faul / len(Test_Y)

# main
Train_X, Train_Y = readTxtData("Iris.txt")
Train_X, Train_Y = randomSort(Train_X, Train_Y)

Res = []
# Ten Fold
for i in range(10):
    # Divide training data and testing data
    W = []
    B = []
    temp_X, temp_Y, TF_Test_X, TF_Test_Y = genTrainData(Train_X, Train_Y, i)
    raw = len(np.array(temp_X))
    TF_Train_X = np.reshape(np.array(temp_X), (int(raw/4), 4)) # 4 attributes
    for j in range(3):
        # OvR
        TF_Train_Y = genTrainY(temp_Y, j) # if temp_Y == j, set 1
        w_v, b_v = runLogistic(TF_Train_X, TF_Train_Y, len(TF_Train_Y))
    # test
    res = runTest(TF_Test_X, TF_Test_Y, W, B, 3)


And, 還有一個需要注意的小地方,寫代價函數的時候最好寫成\sum / num的形式,不要直接寫成\sum和的形式,不然不好收斂。

還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.