沒什麼好解釋的,上代碼
from pyspark.ml.regression import LinearRegression
from pyspark.sql import SparkSession
spark = SparkSession\
.builder\
.appName("LinearRegressionWithElasticNet")\
.getOrCreate()
training = spark.read.format("libsvm")\
.load("sample_linear_regression_data.txt")
# loss:squaredError, huber
# solver:auto, normal, l-bfgs
# elasticNetParam:控制L1正則與L2正則的比例,0即L2,1即L1。計算規則:L1參數爲regParam*elasticNetParam,L2參數爲regParam*(1-elasticNetParam)
lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
lrModel = lr.fit(training)
print("Coefficients: %s" % str(lrModel.coefficients))
print("Intercept: %s" % str(lrModel.intercept))
# Summarize the model over the training set and print out some metrics
trainingSummary = lrModel.summary
print("numIterations: %d" % trainingSummary.totalIterations)
print("objectiveHistory: %s" % str(trainingSummary.objectiveHistory))
trainingSummary.residuals.show()
print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
print("r2: %f" % trainingSummary.r2)