寫在前面: 我是「虐貓人薛定諤i」,一個不滿足於現狀,有夢想,有追求的00後
本博客主要記錄和分享自己畢生所學的知識,歡迎關注,第一時間獲取更新。
不忘初心,方得始終。
❤❤❤❤❤❤❤❤❤❤
數據
該數據集統計了13種可能影響房價的因素和該類型房屋的均價,我們期望構建一個基於13個因素進行房價預測的模型。
代碼
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.dygraph import Linear
import numpy as np
def load_data():
# 從文件導入數據
datafile = './res/housing.data'
data = np.fromfile(datafile, sep=' ')
feature_names = [
'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
'PTRATIO', 'B', 'LSTAT', 'MEDV'
]
feature_num = len(feature_names)
data = data.reshape([data.shape[0] // feature_num, feature_num])
ratio = 0.8
offset = int(data.shape[0] * ratio)
training_data = data[:offset]
maximums, minimums, avgs = training_data.max(axis=0), training_data.min(
axis=0), training_data.sum(axis=0) / training_data.shape[0]
global max_values
global min_values
global avg_values
max_values = maximums
min_values = minimums
avg_values = avgs
for i in range(feature_num):
data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
training_data = data[:offset]
test_data = data[offset:]
return training_data, test_data
class Regressor(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Regressor, self).__init__(name_scope)
name_scope = self.full_name()
self.fc = Linear(input_dim=13, output_dim=1, act=None)
def forward(self, inputs):
x = self.fc(inputs)
return x
with fluid.dygraph.guard():
model = Regressor("Regressor")
model.train()
training_data, test_data = load_data()
opt = fluid.optimizer.SGD(learning_rate=0.01,
parameter_list=model.parameters())
with dygraph.guard(fluid.CPUPlace()):
EPOCH_NUM = 10
BATCH_SIZE = 10
for epoch_id in range(EPOCH_NUM):
np.random.shuffle(training_data)
mini_batches = [
training_data[k:k + BATCH_SIZE]
for k in range(0, len(training_data), BATCH_SIZE)
]
for iter_id, mini_batch in enumerate(mini_batches):
x = np.array(mini_batch[:, :-1]).astype('float32')
y = np.array(mini_batch[:, -1:]).astype('float32')
house_features = dygraph.to_variable(x)
prices = dygraph.to_variable(y)
predicts = model(house_features)
loss = fluid.layers.square_error_cost(predicts, label=prices)
avg_loss = fluid.layers.mean(loss)
if iter_id % 20 == 0:
print("epoch: {}, iter: {}, loss is: {}".format(
epoch_id, iter_id, avg_loss.numpy()))
avg_loss.backward()
opt.minimize(avg_loss)
model.clear_gradients()
fluid.save_dygraph(model.state_dict(), './result/LR_model')
def load_one_example(data_dir):
f = open(data_dir, 'r')
datas = f.readlines()
tmp = datas[-10]
tmp = tmp.strip().split()
one_data = [float(v) for v in tmp]
for i in range(len(one_data) - 1):
one_data[i] = (one_data[i] - avg_values[i]) / (max_values[i] -
min_values[i])
data = np.reshape(np.array(one_data[:-1]), [1, -1]).astype(np.float32)
label = one_data[-1]
return data, label
with dygraph.guard():
model_dict, _ = fluid.load_dygraph('./result/LR_model')
model.load_dict(model_dict)
model.eval()
test_data, label = load_one_example("./res/housing.data")
test_data = dygraph.to_variable(test_data)
results = model(test_data)
results = results * (max_values[-1] - min_values[-1]) + avg_values[-1]
print("Inference result is {}, the corresponding label is {}".format(
results.numpy(), label))
結果
epoch: 0, iter: 0, loss is: [0.12414427]
epoch: 0, iter: 20, loss is: [0.09664079]
epoch: 0, iter: 40, loss is: [0.09450926]
epoch: 1, iter: 0, loss is: [0.1741528]
epoch: 1, iter: 20, loss is: [0.05529367]
epoch: 1, iter: 40, loss is: [0.01332193]
epoch: 2, iter: 0, loss is: [0.16167393]
epoch: 2, iter: 20, loss is: [0.07180402]
epoch: 2, iter: 40, loss is: [0.13977303]
epoch: 3, iter: 0, loss is: [0.19331458]
epoch: 3, iter: 20, loss is: [0.0456266]
epoch: 3, iter: 40, loss is: [0.10872958]
epoch: 4, iter: 0, loss is: [0.07973732]
epoch: 4, iter: 20, loss is: [0.04598405]
epoch: 4, iter: 40, loss is: [0.02657759]
epoch: 5, iter: 0, loss is: [0.03885783]
epoch: 5, iter: 20, loss is: [0.0659483]
epoch: 5, iter: 40, loss is: [0.14601612]
epoch: 6, iter: 0, loss is: [0.06403525]
epoch: 6, iter: 20, loss is: [0.04037663]
epoch: 6, iter: 40, loss is: [0.11558133]
epoch: 7, iter: 0, loss is: [0.02554102]
epoch: 7, iter: 20, loss is: [0.0302988]
epoch: 7, iter: 40, loss is: [0.09855933]
epoch: 8, iter: 0, loss is: [0.03654612]
epoch: 8, iter: 20, loss is: [0.04106348]
epoch: 8, iter: 40, loss is: [0.03786435]
epoch: 9, iter: 0, loss is: [0.0272223]
epoch: 9, iter: 20, loss is: [0.0438651]
epoch: 9, iter: 40, loss is: [0.02117896]
Inference result is [[22.327892]], the corresponding label is 19.7
總結
比起使用numpy編寫,使用paddle編寫可以節省很多時間,代碼量也相對較少。
蒟蒻寫博客不易,加之本人水平有限,寫作倉促,錯誤和不足之處在所難免,謹請讀者和各位大佬們批評指正。
如需轉載,請署名作者並附上原文鏈接,蒟蒻非常感激
名稱:虐貓人薛定諤i
博客地址:https://blog.csdn.net/Deep___Learning