def get_word_count(wordlist, final):
regex = []
count = [[] for x in xrange(len(wordlist))]
frequency = []
regex = makeregex(wordlist)
for i in range(len(final)-1):
size = os.stat(final[i]).st_size
fil = open(final[i])
if(fil):
print final[i] + " read!"
data = mmap.mmap(fil.fileno(), size, access=mmap.ACCESS_READ)
for j in range (len(wordlist)):
count[j].append(re.findall(regex[j], data))
fil.close()
for k in range(len(wordlist)):
frequency.append(sum(count[k]))
print frequency
count
是一个列表列表,每个列表都有一些数字存储在其中。我希望将每个列表的总和存储为新列表frequency
当我运行代码时出现错误:
Traceback (most recent call last):
File "C:\Users\Animesh\Desktop\_zipf.py", line 52, in <module>
get_word_count(wordlist, final)
File "C:\Users\Animesh\Desktop\_zipf.py", line 32, in get_word_count
frequency.append(sum(count[k]))
TypeError: unsupported operand type(s) for +: 'int' and 'list'
我的代码应该更改什么? 请帮忙
答案 0 :(得分:2)
count[j].append(re.findall(regex[j], data))
您正在将正则表达式的已找到单词列表添加到数组count[j]
,因此每个count
元素都是字符串列表的列表,因此在调用sum(count[k])
时会出错。
我认为您想要count[k]
附加已找到的字数:
count[j].append(len(re.findall(regex[j], data)))
答案 1 :(得分:1)
如果你想让它变得更简单,你可以摆脱count = [[] for x in xrange(len(wordlist))]
并且只有count = []
然后在for循环中你使它增加一个临时变量并将其追加到for loop。
size = 0
for j in range (len(wordlist)):
size += len(re.findall(regex[j], data)) #thanks to CharlesB for this bit
count.append(size) #you could also cut out the middle man and just append frequency