我正在处理火花码,我总是遇到错误:
TypeError: 'float' object is not iterable
在reduceByKey()
函数的行上。有人能帮我吗?
这是错误的堆栈跟踪:
d[k] = comb(d[k], v) if k in d else creator(v)
File "/home/hw/SC/SC_spark.py", line 535, in <lambda>
TypeError: 'float' object is not iterable
这是代码:
def field_valid(m):
dis=m[1]
TxP=m[2]
ef=m[3]
pl=m[4]
if TxP != 'NaN' and disl != 'NaN' and ef !='NaN' and pl != 'NaN':
return True
else:
return False
def parse_input(d):
#d=data.split(',')
s_name='S'+d[6] # serving cell name
if d[2] =='NaN' or d[2] == '':
ef='NaN'
else:
ef=float(d[2].strip().rstrip())
if d[7] =='NaN' or d[7] == '' or d[7] == '0':
TxP='NaN'
else:
TxP=float(d[7].strip().rstrip())
if d[9] =='NaN' or d[9] == '':
dis='NaN'
else:
dis=float(d[9].strip().rstrip())
if d[10] =='NaN' or d[10] == '':
pl='NaN'
else:
pl=float(d[10].strip().rstrip())
return s_name,dis, TxP, ef, pl
sc=SparkContext(appName="SC_spark")
lines=sc.textFile(ip_file)
lines=lines.map(lambda m: (m.split(",")))
lines=lines.filter(lambda m: (m[6] != 'cell_name'))
my_rdd=lines.map(parse_input).filter(lambda m: (field_valid(m)==True))
my_rdd=my_rdd.map(lambda x: (x[0],(x[1],x[2])))
my_rdd=my_rdd.reduceByKey(lambda x,y:(max(x[0],y[0]),sum(x[1],y[1]))) #this line got error
以下是一些示例数据:
Class,PB,EF,RP,RQ,ID,cell_name,TxP,BW,DIS,PL,geom
NaN,10,5110,-78.0,-7.0,134381669,S417|134381669|5110,62.78151250383644,10,2578.5795095469166,113.0,NaN
NaN,10,5110,-71.0,-6.599999904632568,134381669,S417|134381669|5110,62.78151250383644,10,2689.630258510342,106.0,NaN
NaN,10,5110,-77.0,-7.300000190734863,134381669,S417|134381669|5110,62.78151250383644,10,2907.8184899249713,112.0,19.299999999999983
NaN,10,5110,-91.0,-11.0,134381669,S417|134381669|5110,62.78151250383644,10,2779.96762695867,126.0,5.799999999999997
NaN,10,5110,-90.0,-12.69999980926514,134381669,S417|134381669|5110,62.78151250383644,10,2749.8351648579583,125.0,9.599999999999994
NaN,10,5110,-95.0,-13.80000019073486,134381669,S417|134381669|5110,62.78151250383644,10,2942.7938902934643,130.0,-2.4000000000000057
NaN,10,5110,-70.0,-7.099999904632568,134381669,S417|134381669|5110,62.78151250383644,10,3151.930706017461,105.0,22.69999999999999
答案 0 :(得分:0)
预期结果是值的和和最大值
在这种情况下,您正在寻找x[1] + y[1]
,而不是使用内置的sum()
功能。
my_rdd.reduceByKey( lambda x,y: ( max(x[0],y[0]), x[1] + y[1] ) )