Question

我正在尝试为学校数据库制作一个简单的搜索脚本，我需要这样做，以便它可以容纳除英语之外的其他语言，特别是日语。我是用这个代码做的。

这是elasticsearch中索引的代码：

import elasticsearch
es = elasticsearch.Elasticsearch() # use default of localhost, port 9200
idnumbr= 1
for s in schools:
    es.index(index="schools", doc_type="school", id=idnumbr, body= {
        "zip": str(s[2]),
        "names" : s[0].encode('utf-8'),
        "city": str(s[1])
       }
         )
    idnumbr= idnumbr + 1

这是实际搜索的代码：

# -*- coding: utf8 -*-
import codecs
class School(object):
    def __init__(self, school_id, name, city, zip_code):
        self.id = school_id
        self.name = name
        self.city = city
        self.zip = zip_code
    def search(self, zips):
        import elasticsearch
        import difflib
        es = elasticsearch.Elasticsearch()
        import elasticsearch_dsl as srch
        s = srch.Search(using= es, index= "schools")\
        .query("match", names= self.name)
        response = s.execute()
        possible_matches = []
        for hit in response:
            hit.names = hit.names.encode("iso2022_jp_2")
            hit.city = hit.city.encode("iso2022_jp_2")
            hit.zip = hit.zip.encode("iso2022_jp_2")
            possible_matches.append([hit.names, hit.city, hit.zip])
    s2 = srch.Search(using= es, index= "schools")\
        .query("match", address= unicode(self.city))
    response2 = s2.execute()
    possible_matches2 = []
    for hit in response2:
        possible_matches2.append([str(hit.names), str(hit.city), str(hit.zip)])
    possible_matches3 = []
    for n in zips:
        s = difflib.SequenceMatcher(None, n, self.zip)
        ratios = (s.ratio(), str(n))
        possible_matches3.append(ratios)
    possible_matches3.sort()
    possible_matches3.reverse()
    possible_matches4 = []
    for l in possible_matches3:
        possible_matches4.append(l[1])
    possible_matches5 = []
    for m in possible_matches4:
        for n in possible_matches2:
            if n[2] == m:
                possible_matches5.append(n)
    if possible_matches != [] and possible_matches2 != []:
        matches = [i for i in possible_matches if i in possible_matches2]
        if matches == []:
            for s in possible_matches2:
                possible_matches.append(s)
            matches = possible_matches
    elif possible_matches2 == []:
        matches = possible_matches
    else:
        matches = possible_matches2
    if n == "" or n == " ":
        possible_matches5 = []
    if possible_matches5 != []:
        cross_match = [i for i in matches if i in possible_matches5]
    else:
        cross_match = matches
    cross_match2 = [', '.join(x) for x in cross_match]
    for n in cross_match2:
        hit.names = hit.names.encode("iso2022_jp_2")
    return cross_match2
def search_name_only(self):
    import elasticsearch
    import difflib
    es = elasticsearch.Elasticsearch()
    import elasticsearch_dsl as srch
    s = srch.Search(using= es, index= "schools")\
    .query("match", names= self.name)
    response = s.execute()
    possible_matches = []
    for hit in response:
        try:
            str(hit.names)
        except UnicodeError:
            codecs.encode(hit.names, "utf-8")
        possible_matches.append([hit.names, str(hit.city), str(hit.zip)])
    name_search = [', '.join(x) for x in possible_matches]
    return name_search
def search_city_only(self):
    import elasticsearch
    import difflib
    es = elasticsearch.Elasticsearch()
    import elasticsearch_dsl as srch
    s = srch.Search(using= es, index= "schools")\
    .query("match", city= unicode(self.city))
    response = s.execute()
    possible_matches = []
    for hit in response:
        possible_matches.append([str(hit.names), str(hit.city), str(hit.zip)])
    name_search = [', '.join(x) for x in possible_matches]
    return name_search
def search_zip_only(self, zips, schools):
    possible_matches3 = []
    import difflib
    for n in zips:
        s = difflib.SequenceMatcher(None, n, self.zip)
        ratios = (s.ratio(), str(n))
        possible_matches3.append(ratios)
    possible_matches3.sort()
    possible_matches3.reverse()
    possible_matches4 = []
    for l in possible_matches3:
        possible_matches4.append(l[1])
    possible_matches5 = []
    for m in possible_matches4:
        for n in schools:
            if n[2] == m and len(possible_matches5) < 10:
                o = []
                for s in n:
                    o.append(str(s))
                possible_matches5.append(o)
    if self == "" or self == " ":
        possible_matches5 = []
    zip_search = [', '.join(x) for x in possible_matches5]
    if self.zip == "" or self.zip == " ":
        zip_search = []
    return zip_search

这是实际使用School类的代码：

from schoolclass import School

import json
import mysql.connector as mdb

cnx = mdb.connect(user= 'root', password= 'standard', host= '127.0.0.1', database= 'sync-helper')
cursor = cnx.cursor()
cursor.execute("SELECT name, City, Zip FROM school")
schools = list(cursor.fetchall())
zips = []
for z in schools:
    zips.append(str(z[2]))

school = School(3, "聖ウルスラ学院英智中学校", "", "")

report = {}
good_search = school.search(zips)
if len(good_search) == 1:
good_search = good_search[0]
report['Comprehensive Search'] = good_search
#report['Name Based Search'] = school.search_name_only()
#report['City Based Search'] = school.search_city_only()
#report['Zip Code Based Search'] = school.search_zip_only(zips, schools)

final_report= json.dumps(report, sort_keys=False,indent=4, separators=(',', ': '))

print final_report

当我调用此函数时，我目前得到的是：

{
    "Comprehensive Search": [
        "\u001b$B@;%&%k%9%i3X1!1QCRCf3X9;\u001b(B , None, None",
        "\u001b$B@;%&%k%9%i3X1!1QCR>.3X9;\u001b(B, None, None"
    ]
}

请帮我解决这个问题并获得＃34;圣ウルス学院英智中学校＆＃34;！

我如何编码以便返回日语字符而不是乱码？

0 个答案: