尝试使其工作.Df是数据帧,我需要使用UDF添加新列:
df = spark.sql("select 'foo' as tweet, 'bar' as medicine_name")
df.createOrReplaceTempView("medicine")
def similarity(x):
return 'df_' + x
similarity_udf = udf(lambda z: similarity(z), returnType=StringType())
def main():
distinct_df = spark.sql("select distinct medicine_name as medicine_name from medicine where medicine_name is not null")
distinct_df.createOrReplaceTempView("distinctDF")
df.show()
result_df = df.withColumn('m_name', similarity_udf('tweet'))
result_df.show()
但是收到错误
Traceback (most recent call last):
File "PycharmProjects/TestCase/main.py", line 85, in <module>
main()
File "PycharmProjects/TestCase/main.py", line 50, in main
result_df.show()
File "PycharmProjects\TestCase\venv\lib\site-packages\pyspark\sql\dataframe.py", line 350, in show
print(self._jdf.showString(n, 20, vertical))
File "PycharmProjects\TestCase\venv\lib\site-packages\py4j\java_gateway.py", line 933, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "PycharmProjects\TestCase\venv\lib\site-packages\pyspark\sql\utils.py", line 63, in deco
return f(*a, **kw)
File "PycharmProjects\TestCase\venv\lib\site-packages\py4j\protocol.py", line 312, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o77.showString.
: java.lang.IllegalArgumentException
有人可以提供什么问题吗?