使用paddle實現波士頓房價預測(線性迴歸模型)

寫在前面: 我是「虐貓人薛定諤i」,一個不滿足於現狀,有夢想,有追求的00後
\quad
本博客主要記錄和分享自己畢生所學的知識,歡迎關注,第一時間獲取更新。
\quad
不忘初心,方得始終。
\quad

❤❤❤❤❤❤❤❤❤❤

數據

該數據集統計了13種可能影響房價的因素和該類型房屋的均價,我們期望構建一個基於13個因素進行房價預測的模型。
在這裏插入圖片描述

代碼

import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.dygraph import Linear
import numpy as np


def load_data():
    # 從文件導入數據
    datafile = './res/housing.data'
    data = np.fromfile(datafile, sep=' ')
    feature_names = [
        'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
        'PTRATIO', 'B', 'LSTAT', 'MEDV'
    ]
    feature_num = len(feature_names)
    data = data.reshape([data.shape[0] // feature_num, feature_num])
    ratio = 0.8
    offset = int(data.shape[0] * ratio)
    training_data = data[:offset]
    maximums, minimums, avgs = training_data.max(axis=0), training_data.min(
        axis=0), training_data.sum(axis=0) / training_data.shape[0]

    global max_values
    global min_values
    global avg_values
    max_values = maximums
    min_values = minimums
    avg_values = avgs

    for i in range(feature_num):
        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
    training_data = data[:offset]
    test_data = data[offset:]
    return training_data, test_data


class Regressor(fluid.dygraph.Layer):
    def __init__(self, name_scope):
        super(Regressor, self).__init__(name_scope)
        name_scope = self.full_name()
        self.fc = Linear(input_dim=13, output_dim=1, act=None)

    def forward(self, inputs):
        x = self.fc(inputs)
        return x


with fluid.dygraph.guard():
    model = Regressor("Regressor")
    model.train()
    training_data, test_data = load_data()
    opt = fluid.optimizer.SGD(learning_rate=0.01,
                              parameter_list=model.parameters())

with dygraph.guard(fluid.CPUPlace()):
    EPOCH_NUM = 10
    BATCH_SIZE = 10

    for epoch_id in range(EPOCH_NUM):
        np.random.shuffle(training_data)
        mini_batches = [
            training_data[k:k + BATCH_SIZE]
            for k in range(0, len(training_data), BATCH_SIZE)
        ]

        for iter_id, mini_batch in enumerate(mini_batches):
            x = np.array(mini_batch[:, :-1]).astype('float32')
            y = np.array(mini_batch[:, -1:]).astype('float32')
            house_features = dygraph.to_variable(x)
            prices = dygraph.to_variable(y)

            predicts = model(house_features)

            loss = fluid.layers.square_error_cost(predicts, label=prices)
            avg_loss = fluid.layers.mean(loss)
            if iter_id % 20 == 0:
                print("epoch: {}, iter: {}, loss is: {}".format(
                    epoch_id, iter_id, avg_loss.numpy()))

            avg_loss.backward()
            opt.minimize(avg_loss)
            model.clear_gradients()

    fluid.save_dygraph(model.state_dict(), './result/LR_model')


def load_one_example(data_dir):
    f = open(data_dir, 'r')
    datas = f.readlines()
    tmp = datas[-10]
    tmp = tmp.strip().split()
    one_data = [float(v) for v in tmp]

    for i in range(len(one_data) - 1):
        one_data[i] = (one_data[i] - avg_values[i]) / (max_values[i] -
                                                       min_values[i])

    data = np.reshape(np.array(one_data[:-1]), [1, -1]).astype(np.float32)
    label = one_data[-1]
    return data, label


with dygraph.guard():
    model_dict, _ = fluid.load_dygraph('./result/LR_model')
    model.load_dict(model_dict)
    model.eval()

    test_data, label = load_one_example("./res/housing.data")
    test_data = dygraph.to_variable(test_data)
    results = model(test_data)

    results = results * (max_values[-1] - min_values[-1]) + avg_values[-1]
    print("Inference result is {}, the corresponding label is {}".format(
        results.numpy(), label))

在這裏插入圖片描述

結果

epoch: 0, iter: 0, loss is: [0.12414427]
epoch: 0, iter: 20, loss is: [0.09664079]
epoch: 0, iter: 40, loss is: [0.09450926]
epoch: 1, iter: 0, loss is: [0.1741528]
epoch: 1, iter: 20, loss is: [0.05529367]
epoch: 1, iter: 40, loss is: [0.01332193]
epoch: 2, iter: 0, loss is: [0.16167393]
epoch: 2, iter: 20, loss is: [0.07180402]
epoch: 2, iter: 40, loss is: [0.13977303]
epoch: 3, iter: 0, loss is: [0.19331458]
epoch: 3, iter: 20, loss is: [0.0456266]
epoch: 3, iter: 40, loss is: [0.10872958]
epoch: 4, iter: 0, loss is: [0.07973732]
epoch: 4, iter: 20, loss is: [0.04598405]
epoch: 4, iter: 40, loss is: [0.02657759]
epoch: 5, iter: 0, loss is: [0.03885783]
epoch: 5, iter: 20, loss is: [0.0659483]
epoch: 5, iter: 40, loss is: [0.14601612]
epoch: 6, iter: 0, loss is: [0.06403525]
epoch: 6, iter: 20, loss is: [0.04037663]
epoch: 6, iter: 40, loss is: [0.11558133]
epoch: 7, iter: 0, loss is: [0.02554102]
epoch: 7, iter: 20, loss is: [0.0302988]
epoch: 7, iter: 40, loss is: [0.09855933]
epoch: 8, iter: 0, loss is: [0.03654612]
epoch: 8, iter: 20, loss is: [0.04106348]
epoch: 8, iter: 40, loss is: [0.03786435]
epoch: 9, iter: 0, loss is: [0.0272223]
epoch: 9, iter: 20, loss is: [0.0438651]
epoch: 9, iter: 40, loss is: [0.02117896]
Inference result is [[22.327892]], the corresponding label is 19.7

總結

比起使用numpy編寫,使用paddle編寫可以節省很多時間,代碼量也相對較少。

蒟蒻寫博客不易,加之本人水平有限,寫作倉促,錯誤和不足之處在所難免,謹請讀者和各位大佬們批評指正。
如需轉載,請署名作者並附上原文鏈接,蒟蒻非常感激
名稱:虐貓人薛定諤i
博客地址:https://blog.csdn.net/Deep___Learning

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章