利用三次多項式對波士頓房價數據進行房價預測,python實現boston housing price 預測

先不囉嗦,上代碼:

import numpy as np

col = [i for i in range(13)]
features = np.loadtxt('Boston-Housing-Price.csv',delimiter=',',usecols=(col))  #read features
prices = np.loadtxt('Boston-Housing-Price.csv',delimiter=',',usecols=(13))     # read prices

z_min, z_max = features.min(axis=0), features.max(axis=0)
features = (features - z_min)/(z_max - z_min)                              #features normalized

z_min, z_max = prices.min(axis=0), prices.max(axis=0)
prices = (prices - z_min)/(z_max - z_min)

data_index = np.arange(features.shape[0])
np.random.shuffle(data_index)                #generate random index series

train_input = features[ data_index[0:270] ]
train_target = prices[ data_index[0:270] ]  # train dataset

validation_input = features[ data_index[270:360] ]
validation_target = prices[ data_index[270:360] ]  # validation dataset

test_input = features[ data_index[360:] ]
test_target = prices[ data_index[360:] ]         # test dataset

one_mat = np.ones( (10,13) )
train_input_ori = train_input
train_input_square = pow(train_input, 2)
train_input_cubed = pow(train_input, 3)

lr = 0.001

params = np.random.random( (13,4) )
for j in range(50):  # train 50 times
    for i in range(27):
        pred_prices = np.dot(one_mat,params[:,0]) \
                      + np.dot( train_input_ori[10*i:10*(i+1)],params[:,1] )  \
                      + np.dot(train_input_square[10*i:10*(i+1)],params[:,2]) \
                      + np.dot(train_input_cubed[10*i:10*(i+1)],params[:,3])
        loss = pred_prices - train_target[i*10 : (i+1)*10]
        loss_square = 0.5 * pow(loss,2)
        los_value = np.sum( loss_square ) / loss_square.shape[0]
        print(los_value)

        delta0 = np.dot( loss, one_mat)
        delta0 = np.sum(delta0,axis=0)
        delta0 = delta0.transpose() * 0.1

        delta1 = np.dot(loss, train_input_ori[10*i:10*(i+1)])
        delta1 = np.sum(delta1, axis=0)
        delta1 = delta1.transpose() * 0.1

        delta2 = np.dot(loss, train_input_square[10*i:10*(i+1)])
        delta2 = np.sum(delta2, axis=0)
        delta2 = delta2.transpose() * 0.1

        delta3 = np.dot(loss, train_input_cubed[10*i:10*(i+1)])
        delta3 = np.sum(delta3, axis=0)
        delta3 = delta3.transpose() * 0.1

        params[:, 0] = params[:,0] - lr * delta0
        params[:, 1] = params[:, 1] - lr * delta1
        params[:, 2] = params[:, 2] - lr * delta2
        params[:, 3] = params[:, 3] - lr * delta3


## valid
one_mat = np.ones( (90,13) )
valid_prices = np.dot(one_mat,params[:,0]) \
               + np.dot( validation_input,params[:,1] )  \
               + np.dot( pow(validation_input,2) ,params[:,2]) \
               + np.dot( pow(validation_input,3),params[:,3])
valid_loss = valid_prices - validation_target
valid_loss_square = 0.5 * pow(valid_loss,2)
valid_los_value = np.sum( valid_loss_square ) / valid_loss_square.shape[0]
print(valid_los_value)

## test
one_mat = np.ones( (92,13) )
test_prices = np.dot(one_mat,params[:,0]) \
               + np.dot( test_input,params[:,1] )  \
               + np.dot( pow(test_input,2) ,params[:,2]) \
               + np.dot( pow(test_input,2),params[:,3])
test_loss = test_prices - test_target
test_loss_square = 0.5 * pow(test_loss,2)
test_los_value = np.sum( test_loss_square ) / test_loss_square.shape[0]
print(test_los_value)

     好像也沒啥需要講的,我用的是三次多項式進行預測,腦殼痛,不想寫了,其實就是懶,哈哈哈哈。如果有問題可以給我留言,知無不言,言無不盡。代碼中我將.csv文件中的第一行屬性名稱刪除了,爲了方便讀取而已。

    波士頓房價預測數據集鏈接爲:鏈接: https://pan.baidu.com/s/1rfLfaKM4PWbmozPLeiB1eA  密碼: 794l

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章