我在项目中使用默认数据库后端进行搜索功能:
from __future__ import absolute_import, unicode_literals
from django.core.paginator import EmptyPage, PageNotAnInteger, Paginator
from django.shortcuts import render
from home.models import BlogPage, get_all_tags
from wagtail.wagtailsearch.models import Query
def search(request):
search_query = request.GET.get('query', None)
page = request.GET.get('page', 1)
# Search
if search_query:
search_results = BlogPage.objects.live().search(search_query)
query = Query.get(search_query)
# Record hit
query.add_hit()
else:
search_results = BlogPage.objects.none()
# Pagination
paginator = Paginator(search_results, 10)
try:
search_results = paginator.page(page)
except PageNotAnInteger:
search_results = paginator.page(1)
except EmptyPage:
search_results = paginator.page(paginator.num_pages)
return render(request, 'search/search.html', {
'search_query': search_query,
'blogpages': search_results,
'tags': get_all_tags()
})
BlogPage:
class BlogPage(Page):
date = models.DateField("Post date")
intro = models.CharField(max_length=250)
body = StreamField([
('heading', blocks.CharBlock(classname="full title")),
('paragraph', blocks.RichTextBlock()),
('image', ImageChooserBlock()),
('code', CodeBlock()),
])
tags = ClusterTaggableManager(through=BlogPageTag, blank=True)
search_fields = Page.search_fields + [
index.SearchField('intro'),
index.SearchField('body'),
]
...
只有当body
模型中的BlogPage
字段为英语时,如果我尝试在body
字段中使用某些俄语单词,那么搜索才能正常运行搜索任何东西
我查看了数据库,我发现BlogPage
有body
字段,如下所示:
[{"value": "\u0442\u0435\u0441\u0442\u043e\u0432\u044b\u0439", "id": "3343151a-edbc-4165-89f2-ce766922d68e", "type": "heading"}, {"value": "<p>\u0442\u0435\u0441\u0442\u0438\u043f\u0440</p>", "id": "22d3818d-8c69-4d72-967e-7c1f807e80b2", "type": "paragraph"}]
所以,问题是wagtail将Streamfield字段保存为unicode字符,如果我手动将phpmyadmin更改为:
[{"value": "Тест", "id": "3343151a-edbc-4165-89f2-ce766922d68e", "type": "heading"}, {"value": "<p>Тестовый</p>", "id": "22d3818d-8c69-4d72-967e-7c1f807e80b2", "type": "paragraph"}]
然后搜索开始工作,所以也许有人知道如何阻止wagtail在unicode中保存Streamfield
字段?
答案 0 :(得分:1)
我讨厌这种解决方法,但我决定只添加其他字段search_body
和search_intro
,然后使用它们进行搜索:
class BlogPage(Page):
date = models.DateField("Post date")
intro = models.CharField(max_length=250)
body = StreamField([
('heading', blocks.CharBlock(classname="full title")),
('paragraph', blocks.RichTextBlock()),
('image', ImageChooserBlock()),
('code', CodeBlock()),
])
search_intro = models.CharField(max_length=250)
search_body = models.CharField(max_length=50000)
tags = ClusterTaggableManager(through=BlogPageTag, blank=True)
def main_image(self):
gallery_item = self.gallery_images.first()
if gallery_item:
return gallery_item.image
else:
return None
def get_context(self, request):
context = super(BlogPage, self).get_context(request)
context['tags'] = get_all_tags()
context['page_url'] = urllib.parse.urljoin(BASE_URL, self.url)
return context
def save(self, *args, **kwargs):
if self.body.stream_data and isinstance(
self.body.stream_data[0], tuple):
self.search_body = ''
for block in self.body.stream_data:
if len(block) >= 2:
self.search_body += str(block[1])
self.search_intro = self.intro.lower()
self.search_body = self.search_body.lower()
return super().save(*args, **kwargs)
search_fields = Page.search_fields + [
index.SearchField('search_intro'),
index.SearchField('search_body'),
]
...
搜索/ views.py:
def search(request):
search_query = request.GET.get('query', None)
page = request.GET.get('page', 1)
# Search
if search_query:
search_results = BlogPage.objects.live().search(search_query.lower())
query = Query.get(search_query)
...
答案 1 :(得分:0)
但我接到了保存方法的双重调用。
我应该使用这段代码:
def save(self, *args, **kwargs):
search_body = ''
if self.blog_post_body.stream_data and isinstance(
self.blog_post_body.stream_data[0], dict):
for block in self.blog_post_body.stream_data:
if block.get('type', '') in ('some_header', 'some_text'):
search_body += str(block['value'])
self.search_body = search_body
super(BlogPost, self).save(*args, **kwargs)
答案 2 :(得分:0)
StreamField 使用 DjangoJSONEncoder 对 JSON 进行编码,其具有 ensure_ascii = True。然后您将看到 Unicode 显示为“\u....”。默认的 db 搜索后端仅使用数据库文本匹配,并且会因使用非 ASCII 关键字的查询而失败。
def get_prep_value(self, value):
if isinstance(value, StreamValue) and not(value) and value.raw_text is not None:
# An empty StreamValue with a nonempty raw_text attribute should have that
# raw_text attribute written back to the db. (This is probably only useful
# for reverse migrations that convert StreamField data back into plain text
# fields.)
return value.raw_text
else:
return json.dumps(self.stream_block.get_prep_value(value), cls=DjangoJSONEncoder)
您需要继承 StreamField 并提供一个带有 ensure_ascii=False 的自定义 JSONEncoder。但是,您需要确保您的数据库默认可以处理 utf-8 字符串。 (对于 PostgreSQL 应该没问题)。
如果您切换到另一个后端,例如 PG 搜索后端。它会在构建索引时从 StreamField 中提取文本(由 https://github.com/wagtail/wagtail/pull/982 引入)。你不会有问题。