我最近开发了一个Python程序,它在某个文档中用术语表示反向索引。我现在想要创建职位发布,例如
to, 993427:
⟨ 1, 6: ⟨7, 18, 33, 72, 86, 231⟩;
2, 5: ⟨1, 17, 74, 222, 255⟩; 4, 5: ⟨8, 16, 190, 429, 433⟩; 5, 2: ⟨363, 367⟩;
7, 3: ⟨13, 23, 191⟩; …⟩
我知道代码不完整,如上所述,我只是想实现功能。
from pprint import pprint as pp
from collections import Counter
import pprint
import re
import sys
import string
import fileinput
try:
reduce
except:
from functools import reduce
try:
raw_input
except:
raw_input = input
def readIn(fileglob): #Reads in multiple files and strips punctation/uppercase.
texts, words = {}, set()
for txtfile in (fileglob):
with open(txtfile, 'r') as splitWords:
txt = splitWords.read().lower().split()
txt = str(txt)
txt = re.findall(r'\w+', txt)
words |= set(txt)
texts[txtfile.split('\\')[-1]] = txt
return texts, words
def search(indexes): # Inverted index, based off the book and the web.
return reduce(set.intersection,
(index[word] for word in indexes),
set(texts.keys()))
def getWordBins(posOfWords):
cnt = Counter()
for word in posOfWords:
cnt[posOfWords] += 1
return cnt
def main(fileList, topWords):
tempArray = []
for x in range(1,len(fileList)):
tempArray.append(fileList[x])
texts, words = readIn(tempArray)
index = {word:set(txt
for txt, wrds in texts.items() if word in wrds)
for word in words}
test =({k + " " + str(len(v)) + " " + str(sorted(v)) for k,v in index.items()})
txt = readIn(fileList)
posWord = getWordBins(txt)
for key, value in posWord.most_common(topWords):
print key, value
#Writes out the information requested to a ".idx" file.
doc = open("document.idx", "w")
doc.write("# INPUT DOCUMENT REFERENCE LEGEND\n")
for fileNumber in range(1, len(fileList)):
doc.write(str(fileNumber) + "\t" + fileList[fileNumber] + "\n")
doc.write("# INVERTED INDEX RESULTS\n")
tempTest = []
for x in test:
tempTest.append(x.split(" "))
for x in tempTest:
tempStr = ""
for y in x:
tempStr += y + "\t"
doc.write(tempStr + "\n")
doc.close
main(sys.argv, sys.argv)
这是我到目前为止,唯一的新功能是getWordBins函数和循环:
txt = readIn(fileList)
posWord = getWordBins(txt)
for key, value in posWord.most_common(topWords):
print key, value
现在,当我尝试运行代码时会发生什么:
Traceback (most recent call last):
File "Intro3.py", line 82, in <module>
main(sys.argv, sys.argv)
File "Intro3.py", line 60, in main
posWord = getWordBins(txt)
File "Intro3.py", line 41, in getWordBins
cnt[posOfWords] += 1
TypeError: unhashable type: 'dict'
很高兴收到任何关于这个令人不安的错误的指导。它不是字典,为什么错误呢? 谢谢你的时间!
答案 0 :(得分:1)
你在做什么:
cnt[posOfWords] += 1
我想你可能意味着:
cnt[word] += 1
你的readin函数也返回一个dict和一个集合,所以你的txt变量是(dict
,set
)
所以你的问题归结为试图使用一个持有字典作为键的元组(我怀疑是你的意图)。它对cnt[word] += 1
不起作用,因为那仍然会尝试使用dict作为关键。你可能需要这样做:
txt, _ = readIn(fileList)
然后这可能会起作用:
cnt[word] += 1