Normalize a vector to have unit norm using the given p-norm
# Normalize a vector to have unit norm using the given p-norm
from pyspark.ml.linalg import Vectors
svec = Vectors.sparse(4, {1: 4.0, 3: 3.0})
df = spark.createDataFrame([(Vectors.dense([3.0, -4.0]),
svec)], ["dense", "sparse"])
normalizer = Normalizer(p=2.0, inputCol="dense",
outputCol="features")
normalizer.transform(df).head().features
# DenseVector([0.6, -0.8])
normalizer.setParams(inputCol="sparse",
outputCol="freqs").transform(df).head().freqs
# SparseVector(4, {1: 0.8, 3: 0.6})
params = {normalizer.p: 1.0, normalizer.inputCol: "dense",
normalizer.outputCol: "vector"}
normalizer.transform(df, params).head().vector
# DenseVector([0.4286, -0.5714])
normalizerPath = temp_path + "/normalizer"
normalizer.save(normalizerPath)
loadedNormalizer = Normalizer.load(normalizerPath)
loadedNormalizer.getP() == normalizer.getP()
# True