概念部分參考知乎一大哥的文章:
https://zhuanlan.zhihu.com/p/30535220
代碼實現(哈哈,我懶得敲,也給拿過來了):
def lasso_regression(X, y, lambd=0.2, threshold=0.1):
''' 通過座標下降(coordinate descent)法獲取LASSO迴歸係數
'''
# 計算殘差平方和
rss = lambda X, y, w: (y - X*w).T*(y - X*w)
# 初始化迴歸係數w.
m, n = X.shape
w = np.matrix(np.zeros((n, 1)))
r = rss(X, y, w)
# 使用座標下降法優化迴歸係數w
niter = itertools.count(1)
for it in niter:
for k in range(n):
# 計算常量值z_k和p_k
z_k = (X[:, k].T*X[:, k])[0, 0]
p_k = 0
for i in range(m):
p_k += X[i, k]*(y[i, 0] - sum([X[i, j]*w[j, 0] for j in range(n) if j != k]))
if p_k < -lambd/2:
w_k = (p_k + lambd/2)/z_k
elif p_k > lambd/2:
w_k = (p_k - lambd/2)/z_k
else:
w_k = 0
w[k, 0] = w_k
r_prime = rss(X, y, w)
delta = abs(r_prime - r)[0, 0]
r = r_prime
print('Iteration: {}, delta = {}'.format(it, delta))
if delta < threshold:
break
return w
if '__main__' == __name__:
X, y = load_data('abalone.txt')
X, y = standarize(X), standarize(y)
w = lasso_regression(X, y, lambd=10)
y_prime = X*w
# 計算相關係數
corrcoef = get_corrcoef(np.array(y.reshape(1, -1)),
np.array(y_prime.reshape(1, -1)))
print('Correlation coefficient: {}'.format(corrcoef))
# LASSO迴歸係數軌跡
# 類似嶺軌跡,我們也可以改變λλ的值得到不同的迴歸係數,通過作圖可以看到迴歸係數的軌跡
ntest = 30
# 繪製軌跡
ws = lasso_traj(X, y, ntest)
fig = plt.figure()
ax = fig.add_subplot(111)
lambdas = [i-10 for i in range(ntest)]
ax.plot(lambdas, ws)
plt.show()
# Lasso中不斷有係數變爲0