用空格过滤Xapian

时间:2013-09-21 12:36:14

标签: c++ xapian

如何使用具有空格的查询过滤搜索结果。

#include <iostream>
#include <string>
#include <xapian.h>
struct document{
    std::string title;
    std::string content;
    std::string url;
};

void indexData(document d) {
    try {
        Xapian::WritableDatabase db("/Users/ramesh/Desktop/xapian", Xapian::DB_CREATE_OR_OPEN);
        Xapian::TermGenerator indexer;
        Xapian::Stem stemmer("english");
        indexer.set_stemmer(stemmer);
        Xapian::Document doc;
        doc.set_data(d.title);
        indexer.set_document(doc);
        indexer.index_text(d.title,1,"title");
        indexer.index_text(d.content,1,"content");
        indexer.index_text(d.url,1,"url");
        doc.add_boolean_term("title"+d.title);
        db.replace_document(d.url,doc);
        db.commit();
    } catch (const Xapian::Error &e) {
        std::cout << e.get_description() << std::endl;
        exit(1);
    }
}

void searchData(std::string query_string){
    try{
        Xapian::Database db("/Users/ramesh/Desktop/xapian");
        Xapian::Enquire enquire(db);
        Xapian::QueryParser qp;
        Xapian::Stem stemmer("english");
        qp.set_default_op(Xapian::Query::OP_FILTER);
        qp.set_stemmer(stemmer);
        qp.add_prefix("","title");
        qp.add_prefix("","content");
        qp.add_boolean_prefix("title","title");
        qp.set_database(db);
        qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
        Xapian::Query query = qp.parse_query(query_string);
        std::cout << "Parsed query is: " << query.get_description() << std::endl;
        enquire.set_query(query);
        Xapian::MSet matches = enquire.get_mset(0, 10);
        std::cout << matches.get_matches_estimated() << " results found.\n";
        std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl;
        for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
            std::cout << i.get_rank() + 1 << ": " << i.get_weight() << " docid=" << *i
                 << " [" << i.get_document().get_data() << "]\n\n";
        }
    } catch (const Xapian::Error &e) {
        std::cout << e.get_description() << std::endl;
        exit(1);
    }

}

int main()
{
    document d1,d2;
    d1.title = "Xapain is good";
    d1.content = "Xapian is an open source search engine library, which allows developers to add advanced indexing and search facilities to their own applications.";
    d1.url = "http://www.xapian.org";
    d2.title = "Xapain is awesome";
    d2.content = "good Xapian is an open source search engine library, which allows developers to add advanced indexing and search facilities to their own applications.";
    d2.url = "http://www.xapian.org/test";
    indexData(d1);
    indexData(d2);
    searchData("xapian title:good");
    searchData("xapian title:Xapian is good");
    return 0;
}

首先查询“xapian title:good”效果很好。

但是“xapian title:Xapian很好”失败了。

任何人都可以解释一下是什么问题

2 个答案:

答案 0 :(得分:0)

您将标题定义为:

d1.title = "Xapain is good";

,您的搜索查询是:

searchData("xapian title:Xapian is good");

问题是你是否曾为“Xapain”拼错“Xapian”?

答案 1 :(得分:-1)

在searchData()方法中,您应该包含

document d;

qp.add_prefix(d.title,"title");
qp.add_prefix(d.content,"content");

而不是

qp.add_prefix("","title");
qp.add_prefix("","content");