我正在尝试为学校数据库制作一个简单的搜索脚本,我需要这样做,以便它可以容纳除英语之外的其他语言,特别是日语。我是用这个代码做的。
这是elasticsearch中索引的代码:
import elasticsearch
es = elasticsearch.Elasticsearch() # use default of localhost, port 9200
idnumbr= 1
for s in schools:
es.index(index="schools", doc_type="school", id=idnumbr, body= {
"zip": str(s[2]),
"names" : s[0].encode('utf-8'),
"city": str(s[1])
}
)
idnumbr= idnumbr + 1
这是实际搜索的代码:
# -*- coding: utf8 -*-
import codecs
class School(object):
def __init__(self, school_id, name, city, zip_code):
self.id = school_id
self.name = name
self.city = city
self.zip = zip_code
def search(self, zips):
import elasticsearch
import difflib
es = elasticsearch.Elasticsearch()
import elasticsearch_dsl as srch
s = srch.Search(using= es, index= "schools")\
.query("match", names= self.name)
response = s.execute()
possible_matches = []
for hit in response:
hit.names = hit.names.encode("iso2022_jp_2")
hit.city = hit.city.encode("iso2022_jp_2")
hit.zip = hit.zip.encode("iso2022_jp_2")
possible_matches.append([hit.names, hit.city, hit.zip])
s2 = srch.Search(using= es, index= "schools")\
.query("match", address= unicode(self.city))
response2 = s2.execute()
possible_matches2 = []
for hit in response2:
possible_matches2.append([str(hit.names), str(hit.city), str(hit.zip)])
possible_matches3 = []
for n in zips:
s = difflib.SequenceMatcher(None, n, self.zip)
ratios = (s.ratio(), str(n))
possible_matches3.append(ratios)
possible_matches3.sort()
possible_matches3.reverse()
possible_matches4 = []
for l in possible_matches3:
possible_matches4.append(l[1])
possible_matches5 = []
for m in possible_matches4:
for n in possible_matches2:
if n[2] == m:
possible_matches5.append(n)
if possible_matches != [] and possible_matches2 != []:
matches = [i for i in possible_matches if i in possible_matches2]
if matches == []:
for s in possible_matches2:
possible_matches.append(s)
matches = possible_matches
elif possible_matches2 == []:
matches = possible_matches
else:
matches = possible_matches2
if n == "" or n == " ":
possible_matches5 = []
if possible_matches5 != []:
cross_match = [i for i in matches if i in possible_matches5]
else:
cross_match = matches
cross_match2 = [', '.join(x) for x in cross_match]
for n in cross_match2:
hit.names = hit.names.encode("iso2022_jp_2")
return cross_match2
def search_name_only(self):
import elasticsearch
import difflib
es = elasticsearch.Elasticsearch()
import elasticsearch_dsl as srch
s = srch.Search(using= es, index= "schools")\
.query("match", names= self.name)
response = s.execute()
possible_matches = []
for hit in response:
try:
str(hit.names)
except UnicodeError:
codecs.encode(hit.names, "utf-8")
possible_matches.append([hit.names, str(hit.city), str(hit.zip)])
name_search = [', '.join(x) for x in possible_matches]
return name_search
def search_city_only(self):
import elasticsearch
import difflib
es = elasticsearch.Elasticsearch()
import elasticsearch_dsl as srch
s = srch.Search(using= es, index= "schools")\
.query("match", city= unicode(self.city))
response = s.execute()
possible_matches = []
for hit in response:
possible_matches.append([str(hit.names), str(hit.city), str(hit.zip)])
name_search = [', '.join(x) for x in possible_matches]
return name_search
def search_zip_only(self, zips, schools):
possible_matches3 = []
import difflib
for n in zips:
s = difflib.SequenceMatcher(None, n, self.zip)
ratios = (s.ratio(), str(n))
possible_matches3.append(ratios)
possible_matches3.sort()
possible_matches3.reverse()
possible_matches4 = []
for l in possible_matches3:
possible_matches4.append(l[1])
possible_matches5 = []
for m in possible_matches4:
for n in schools:
if n[2] == m and len(possible_matches5) < 10:
o = []
for s in n:
o.append(str(s))
possible_matches5.append(o)
if self == "" or self == " ":
possible_matches5 = []
zip_search = [', '.join(x) for x in possible_matches5]
if self.zip == "" or self.zip == " ":
zip_search = []
return zip_search
这是实际使用School类的代码:
from schoolclass import School
import json
import mysql.connector as mdb
cnx = mdb.connect(user= 'root', password= 'standard', host= '127.0.0.1', database= 'sync-helper')
cursor = cnx.cursor()
cursor.execute("SELECT name, City, Zip FROM school")
schools = list(cursor.fetchall())
zips = []
for z in schools:
zips.append(str(z[2]))
school = School(3, "聖ウルスラ学院英智中学校", "", "")
report = {}
good_search = school.search(zips)
if len(good_search) == 1:
good_search = good_search[0]
report['Comprehensive Search'] = good_search
#report['Name Based Search'] = school.search_name_only()
#report['City Based Search'] = school.search_city_only()
#report['Zip Code Based Search'] = school.search_zip_only(zips, schools)
final_report= json.dumps(report, sort_keys=False,indent=4, separators=(',', ': '))
print final_report
当我调用此函数时,我目前得到的是:
{
"Comprehensive Search": [
"\u001b$B@;%&%k%9%i3X1!1QCRCf3X9;\u001b(B , None, None",
"\u001b$B@;%&%k%9%i3X1!1QCR>.3X9;\u001b(B, None, None"
]
}
请帮我解决这个问题并获得#34;圣ウルス学院英智中学校&#34;!