我创建一个新列并将其转换为整数。但是该列不可为空。如何使新列可为空?
from pyspark.sql import functions as F
from pyspark.sql import types as T
zschema = T.StructType([T.StructField("col1", T.StringType(), True),\
T.StructField("col2", T.StringType(), True),\
T.StructField("time", T.DoubleType(), True),\
T.StructField("val", T.DoubleType(), True)])
df = spark.createDataFrame([("a","b", 1.0,2.0), ("a","b", 2.0,3.0) ], zschema)
df.printSchema()
df.show()
df = df.withColumn("xcol" , F.lit(0))
df = df.withColumn( "xcol" , F.col("xcol").cast(T.IntegerType()) )
df.printSchema()
df.show()
答案 0 :(得分:0)
df1 = df.rdd.toDF()
df1.printSchema()
root
|-- col1: string (nullable = true)
|-- col2: string (nullable = true)
|-- time: double (nullable = true)
|-- val: double (nullable = true)
|-- xcol: long (nullable = true)