我有以下我正在尝试运行的代码
SUM(distinct tb.b2)
然而,无论我改变什么,我实现我收到以下错误。有人可以指出这里有什么问题吗?
import json
import textProcess as tp
review = open('../inres_review.json')
vocabulary = open('../vocabulary.txt','w+')
label = open('../label.txt','w+')
data = open('../data.txt','w+')
voc = []
revs = []
lab = []
dat = []
i=1
for line in review:
jre = json.loads(line)
jstar = jre['stars']
text = jre['text']
lab.append(jstar)
ws = tp.removeStopPunc(text)
revs.append(ws)
voc += ws
i += 1
for i in lab:
label.write(str(i)+"\n")
print ("label created successfully!")
voc = list(set(voc))
print (len(voc))
print (type(i))
for i in voc:
vocabulary.write(i.encode('UTF-8')+"\n")
print ("Vocabulary created successfully!")
for revid, rev in enumerate(revs):
dat.append({})
for w in rev:
if w in voca:
k = voca.index(w)+1
if k not in dat[revid]:
dat[revid][k] = 1
else:
dat[revid][k] += 1
print (len(revs))
for revid, rev in enumerate(dat):
for k,v in rev.iteritems():
s = str(revid+1)+' '+str(k)+' '+str(v)+'\n'
data.write(s)
print ("successfully create data")
review.close()
vocabulary.close()
label.close()
data.close()
感谢任何帮助!
答案 0 :(得分:4)
encode
返回bytes
,因此您还需要将'\n'
转换为bytes
:
i.encode('UTF-8') + b"\n"