Wir UDF verwenden können, um den Vektor zu neu serialisieren und die Werte zugreifen,
>>> from pyspark.sql import function as F
>>> from pyspark.sql.types import IntegerType
>>> df = spark.createDataFrame([(1,2,3,Vectors.dense([1211])),(2,2,4,Vectors.dense([1222])),(4,5,4,Vectors.dense([12322]))],['a','b','c','d'])
>>> df.show()
+---+---+---+---------+
| a| b| c| d|
+---+---+---+---------+
| 1| 2| 3| [1211.0]|
| 2| 2| 4| [1222.0]|
| 4| 5| 4|[12322.0]|
+---+---+---+---------+
>>> df.printSchema()
root
|-- a: long (nullable = true)
|-- b: long (nullable = true)
|-- c: long (nullable = true)
|-- d: vector (nullable = true)
>>> udf1 = F.udf(lambda x : int(x[0]),IntegerType())
>>> df.select('d',udf1('d').alias('d1')).show()
+---------+-----+
| d| d1|
+---------+-----+
| [1211.0]| 1211|
| [1222.0]| 1222|
|[12322.0]|12322|
+---------+-----+
>>> df.select('d',udf1('d').alias('d1')).printSchema()
root
|-- d: vector (nullable = true)
|-- d1: integer (nullable = true)
was ist Predicted_Reservatin_Count_Value? – Suresh