先不囉嗦,上代碼:
import numpy as np
col = [i for i in range(13)]
features = np.loadtxt('Boston-Housing-Price.csv',delimiter=',',usecols=(col)) #read features
prices = np.loadtxt('Boston-Housing-Price.csv',delimiter=',',usecols=(13)) # read prices
z_min, z_max = features.min(axis=0), features.max(axis=0)
features = (features - z_min)/(z_max - z_min) #features normalized
z_min, z_max = prices.min(axis=0), prices.max(axis=0)
prices = (prices - z_min)/(z_max - z_min)
data_index = np.arange(features.shape[0])
np.random.shuffle(data_index) #generate random index series
train_input = features[ data_index[0:270] ]
train_target = prices[ data_index[0:270] ] # train dataset
validation_input = features[ data_index[270:360] ]
validation_target = prices[ data_index[270:360] ] # validation dataset
test_input = features[ data_index[360:] ]
test_target = prices[ data_index[360:] ] # test dataset
one_mat = np.ones( (10,13) )
train_input_ori = train_input
train_input_square = pow(train_input, 2)
train_input_cubed = pow(train_input, 3)
lr = 0.001
params = np.random.random( (13,4) )
for j in range(50): # train 50 times
for i in range(27):
pred_prices = np.dot(one_mat,params[:,0]) \
+ np.dot( train_input_ori[10*i:10*(i+1)],params[:,1] ) \
+ np.dot(train_input_square[10*i:10*(i+1)],params[:,2]) \
+ np.dot(train_input_cubed[10*i:10*(i+1)],params[:,3])
loss = pred_prices - train_target[i*10 : (i+1)*10]
loss_square = 0.5 * pow(loss,2)
los_value = np.sum( loss_square ) / loss_square.shape[0]
print(los_value)
delta0 = np.dot( loss, one_mat)
delta0 = np.sum(delta0,axis=0)
delta0 = delta0.transpose() * 0.1
delta1 = np.dot(loss, train_input_ori[10*i:10*(i+1)])
delta1 = np.sum(delta1, axis=0)
delta1 = delta1.transpose() * 0.1
delta2 = np.dot(loss, train_input_square[10*i:10*(i+1)])
delta2 = np.sum(delta2, axis=0)
delta2 = delta2.transpose() * 0.1
delta3 = np.dot(loss, train_input_cubed[10*i:10*(i+1)])
delta3 = np.sum(delta3, axis=0)
delta3 = delta3.transpose() * 0.1
params[:, 0] = params[:,0] - lr * delta0
params[:, 1] = params[:, 1] - lr * delta1
params[:, 2] = params[:, 2] - lr * delta2
params[:, 3] = params[:, 3] - lr * delta3
## valid
one_mat = np.ones( (90,13) )
valid_prices = np.dot(one_mat,params[:,0]) \
+ np.dot( validation_input,params[:,1] ) \
+ np.dot( pow(validation_input,2) ,params[:,2]) \
+ np.dot( pow(validation_input,3),params[:,3])
valid_loss = valid_prices - validation_target
valid_loss_square = 0.5 * pow(valid_loss,2)
valid_los_value = np.sum( valid_loss_square ) / valid_loss_square.shape[0]
print(valid_los_value)
## test
one_mat = np.ones( (92,13) )
test_prices = np.dot(one_mat,params[:,0]) \
+ np.dot( test_input,params[:,1] ) \
+ np.dot( pow(test_input,2) ,params[:,2]) \
+ np.dot( pow(test_input,2),params[:,3])
test_loss = test_prices - test_target
test_loss_square = 0.5 * pow(test_loss,2)
test_los_value = np.sum( test_loss_square ) / test_loss_square.shape[0]
print(test_los_value)
好像也沒啥需要講的,我用的是三次多項式進行預測,腦殼痛,不想寫了,其實就是懶,哈哈哈哈。如果有問題可以給我留言,知無不言,言無不盡。代碼中我將.csv文件中的第一行屬性名稱刪除了,爲了方便讀取而已。
波士頓房價預測數據集鏈接爲:鏈接: https://pan.baidu.com/s/1rfLfaKM4PWbmozPLeiB1eA 密碼: 794l