我正在写一个代码用以下的pathern替换字符:[^ \ w |与''。关键是当在我的函数'removePunctuation'中使用DataFrame'sentenceDF'时,我得到以下错误'column'对象不可调用'。
from pyspark.sql.functions import regexp_replace, trim, col, lower
def removePunctuation(column):
cleanString = column
cleanString = cleanString.select(regexp_replace(sentenceDF['sentence'],'[^\w | ]','').alias('sentence'))
cleanString = cleanString.select(regexp_replace(cleanString['sentence'],'_','').alias('sentence'))
cleanString = cleanString.select(lower(cleanString['sentence']))
return cleanString
sentenceDF = sqlContext.createDataFrame([('Hi, you!',),
(' No under_score!',),
(' * Remove punctuation then spaces * ',)], ['sentence'])
result = sentenceDF.select(removePunctuation(col('sentence')))
result.show()
TraceBack:
TypeError: 'Column' object is not callable
--------------------------------------------------------------------------- TypeError Traceback (most recent call last)
<ipython-input-50-aa978fac8bae> in <module>()
15 (' * Remove punctuation then spaces * ',)], ['sentence'])
16
---> 17 result = sentenceDF.select(removePunctuation(col('sentence')))
18 result.show()
<ipython-input-50-aa978fac8bae> in removePunctuation(column)
4 def removePunctuation(column):
5 cleanString = column
----> 6 cleanString = cleanString.select(regexp_replace(sentenceDF['sentence'],'[^\w | ]','').alias('sentence'))
7 cleanString = cleanString.select(regexp_replace(cleanString['sentence'],'_','').alias('sentence'))
8 cleanString = cleanString.select(lower(cleanString['sentence'])) TypeError: 'Column' object is not callable
Command took 0.09 seconds -- by andres.velez.e@gmail.com at 10/30/2016, 2:48:17 PM on My Cluster (6 GB)
答案 0 :(得分:1)
这样做 - 你得到同样的错误。
col('sentence').select()
建议:在重构函数之前,总是尝试编写代码。
无论如何,我认为这就是你想要的。
def removePunctuation(df, column):
cleanString = df.select(trim(lower(col('sentence'))).alias('sentence'))
cleanString = cleanString.select(regexp_replace('sentence','[^\w]|\s+|_','').alias('sentence'))
return cleanString
result = removePunctuation(sentenceDF, 'sentence')
result.show()
+--------------------+
| sentence|
+--------------------+
| hiyou|
| nounderscore|
|removepunctuation...|
+--------------------+