我想一个人做一个搜索引擎。所以我想在完成搜寻器之后建立一个反向索引。但是时间太长了。因此,我希望对代码有一些建议。 (在Windows上安装Lucene太困难和烦人,所以我想自己做,学习反向索引的实现,而不仅仅是使用lucene。)
mydb = mysql.connector.connect(
host="localhost",
user="root",
passwd="121314",
database="spider"
)
mycursor = mydb.cursor()
mycursor.execute("SELECT id, data FROM page")
result = mycursor.fetchall()
for row in result:
lexicon(row[0], row[1])
word_dict = {}
r = redis.Redis(host="127.0.0.1", port=6379, db=0)
def lexicon(idd, data):
print("Lexiconing and building index.")
word_list = jieba.cut_for_search(str(data))
# stop_words
with open('stop_words.txt') as f:
stop_words = [line.strip() for line in f.readlines()]
for item in word_list:
if item not in stop_words:
if item not in word_dict:
word_dict[item] = {}
word_dict[item][idd] = 1
r.hset(item, idd, 1)
else:
if idd not in word_dict[item]:
word_dict[item][idd] = 1
r.hset(item, idd, 1)
else:
word_dict[item][idd] += 1
r.hincrby(item, idd, 1)