import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import svm
X = np.sort(5 * np.random.rand(100, 1), axis = 0)
Y = np.sin(X).ravel()
Y[::5] += 3 * (0.5 - np.random.rand(20))
Y = Y.reshape((100,1))
plt.scatter(X, Y, color='m', edgecolor='k', alpha=0.5)
batch_size = 100
x = tf.placeholder(tf.float32, shape=(None,1))
y = tf.placeholder(tf.float32, shape=(None,1))
w1 = tf.Variable(tf.random_uniform([1,20],0,1))
w2 = tf.Variable(tf.random_uniform([20,1],0,1))
b1 = tf.Variable(tf.zeros([batch_size,20]))
b2 = tf.Variable(tf.zeros([batch_size,1]))
h_input = tf.matmul(x, w1) + b1
h_output = tf.nn.relu(h_input)
y_ = tf.matmul(h_output, w2) + b2
loss = tf.reduce_mean(tf.square(y_ - y)) + 10 * (tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2))
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
with tf.Session() as sess:
tf.global_variables_initializer().run()
min_loss = np.inf
for i in range(1000):
start = (i * batch_size) % 100
end = min(start+batch_size, 100)
feed = {x:X[start:end], y:Y[start:end]}
sess.run(train_step, feed_dict=feed)
train_loss = sess.run(loss, feed_dict=feed)
y_pre = sess.run(y_, feed_dict=feed)
if train_loss < min_loss:
min_loss = train_loss
y_pre_last = y_pre
svr = svm.SVR(kernel='rbf', C=10, gamma=0.1, epsilon=.1)
y_svr = svr.fit(X, Y).predict(X)
x_ = np.linspace(0, 5, 100)
plt.plot(x_, y_pre_last, linewidth=2, label="neural network")
plt.plot(x_, y_svr, color='k', linewidth=2, label="support vector regression")
plt.legend()
plt.xlabel("x Axis")
plt.ylabel("y Axis")
plt.title("The model of the neural network")
plt.show()
即便在神經網絡的損失函數中加入L2正則項,最終得到的預測結果也很容易產生過擬合,但是SVR模型得到的預測結果就比神經網絡要好很多,同樣加入L2正則項,SVR模型能夠比較好的避免過擬合。
如果在神經網絡中加入dropout,則會有效避免過擬合。
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import svm
X = np.sort(5 * np.random.rand(100, 1), axis = 0)
Y = np.sin(X).ravel()
Y[::5] += 3 * (0.5 - np.random.rand(20))
Y = Y.reshape((100,1))
plt.scatter(X, Y, color='m', edgecolor='k', alpha=0.5)
batch_size = 100
hidden_size = 20
x = tf.placeholder(tf.float32, shape=(None,1))
y = tf.placeholder(tf.float32, shape=(None,1))
h1 = tf.layers.dense(x, hidden_size, tf.nn.relu)
h1 = tf.layers.dropout(h1, rate=0.7)
h2 = tf.layers.dense(h1, hidden_size, tf.nn.relu)
h2 = tf.layers.dropout(h2, rate=0.5)
y_ = tf.layers.dense(h2, 1)
loss = tf.reduce_mean(tf.square(y_ - y))
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
with tf.Session() as sess:
tf.global_variables_initializer().run()
min_loss = np.inf
for i in range(1000):
start = (i * batch_size) % 100
end = min(start+batch_size, 100)
feed = {x:X[start:end], y:Y[start:end]}
sess.run(train_step, feed_dict=feed)
train_loss = sess.run(loss, feed_dict=feed)
y_pre = sess.run(y_, feed_dict=feed)
if train_loss < min_loss:
min_loss = train_loss
y_pre_last = y_pre
svr = svm.SVR(kernel='rbf', C=10, gamma=0.1, epsilon=.1)
y_svr = svr.fit(X, Y).predict(X)
x_ = np.linspace(0, 5, 100)
plt.plot(x_, y_pre_last, color='c', linewidth=3, label="neural network")
plt.plot(x_, y_svr, color='k', linewidth=2, label="support vector regression")
plt.legend()
plt.xlabel("x Axis")
plt.ylabel("y Axis")
plt.title("The model of the neural network")
plt.show()