The learning objective is to minimize the squared error, with regularization
# The learning objective is to minimize the squared error, with regularization
from pyspark.ml.linalg import Vectors
df = spark.createDataFrame([
(1.0, 2.0, Vectors.dense(1.0)),
(0.0, 2.0, Vectors.sparse(1, [], []))], ["label", "weight", "features"])
lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal", weightCol="weight")
model = lr.fit(df)
test0 = spark.createDataFrame([Vectors.dense(-1.0),)], ["features"])
abs(model.transform(test0).head().prediction - (-1.0)) < 0.001
# True
abs(model.coefficients[0] - 1.0) < 0.001
# True
abs(model.intercept - 0.0) < 0.001
# True
test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
abs(model.transform(test1).head().prediction - 1.0) < 0.001
# True
lr.setParams("vector")
# Traceback (most recent call last):
# ...
# TypeError: Method setParams forces keyword arguments.
lr_path = temp_path + "/lr"
lr.save(lr_path)
lr2 = LinearRegression.load(lr_path)
lr2.getMaxIter()
# 5
model_path = temp_path + "/lr_model"
model.save(model_path)
model2 = LinearRegressionModel.load(model_path)
model.coefficient[0] == model2.coefficients[0]
# True
model.intercept == model2.intercept
# True
model.numFeatures
# 1