Xapian - 搜索波兰字符(编码问题?)

时间:2013-02-26 14:29:07

标签: internationalization xapian

我使用xapian作为我网站的搜索引擎。

最近我发现它没有搜索含有波兰特定字符的单词,如ś,ą,ć,ę。

无论何时我试图搜索包含其中一种语言特定字符的单词,它都不会返回任何结果。在xapian中是否有任何编码设置?

这些是我的索引和搜索功能($ document包含内容,id和路由字段)。

protected function _indexDocument($document, $indexer, $database)
{
    $doc = new XapianDocument();
    $content = Zend_Json::encode($document);
    $doc->set_data($content);

    $indexer->set_document($doc);
    $indexer->index_text($content);
    $term = (string) md5($document['id']);
    $doc->add_boolean_term($term);
    $database->replace_document($term, $doc);
    return true;
}


public function searchDocuments($phrase, $page = 0, $limit = 10)
{
    $page = (int) $page;
    $limit = (int) $limit;

    $database = new XapianDatabase($this->getDatabasePath());
    $enquire = new XapianEnquire($database);
    $qp = new XapianQueryParser();
    $stemmer = new XapianStem("english");
    $qp->set_stemmer($stemmer);
    $qp->set_database($database);
    $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
    $query = $qp->parse_query($phrase);

    $enquire->set_query($query);
    $matches = $enquire->get_mset(($page-1) * $limit, $limit);

    $documentCount = $matches->get_matches_estimated();

    $i = $matches->begin();
    $documents = array();
    $rawDocuments = array();
    while (!$i->equals($matches->end())) {
        $n = $i->get_rank() + 1;
        $data = $i->get_document()->get_data();
        $documents[] = $this->_prepareDocument( Zend_Json::decode($data), $phrase );
        $rawDocuments[]= Zend_Json::decode($data);
        $i->next();
    }

    $pageCount = ceil($documentCount / $limit);
    if ($page > 0) {
        $prevPage = ($page - 1) * $limit;
    } else {
        $prevPage = 0;
    }
    if ($page < $pageCount) {
        $nextPage = ($page + 1) * $limit;
    } else {
        $nextPage = $pageCount;
    }
    $result = array('results' => $documents, 'results-raw' => $rawDocuments, 'paginator' => array(
            'page' => $page, 'limit' => $limit, 'pageCount' => $pageCount,
            'prevPage' => $prevPage, 'nextPage' => $nextPage,
            'documentCount' => $documentCount));
    return $result;

}

0 个答案:

没有答案