Question

这里我打算做的是根据频率加上已经存在的值对文本进行评分索引作为字段的一部分。基于以前的频率值，我没有问题得分。但是添加后一部分我有问题。以下是整体代码：

#!/usr/bin/env python
""" 
Information retrieval
For fast searching 

Based on Python 2.7.6
"""

import os, os.path

# Whoosh related
from whoosh.index import create_in
import whoosh.index as index
from whoosh.fields import *
from whoosh.filedb.filestore import FileStorage
from whoosh import scoring 


def myscore_fn(searcher, fieldname, text, matcher):
    """
    My weighting function 
    """
    # currently just taking frequency count
    freq = matcher.value_as("frequency")

    # What I want to do is to return
    #        freq + myscore
    # where myscore is the field as define in the Schema below. 
    return freq

def create_index(indexdir="whooshindex",indexname="usages"):
    """docstring for create_index"""

    if not os.path.exists(indexdir):
        os.mkdir(indexdir)

    # set (stored=True) if you want the fields to be
    # displayed later 
    schema = Schema(title=TEXT(stored=True), myscore=NUMERIC, path=ID(stored=True),
            content=TEXT(stored=True))
    ix = create_in(indexdir, schema=schema)
    writer = ix.writer()
    writer.add_document(title=u"First document", myscore=1000, path=u"/a",content=u"This is the first document we've added")
    writer.add_document(title=u"Second document", myscore=100, path=u"/b",content=u"The second, third one is even more interesting!")
    third_title = "Third document"
    third_path =  "/c"
    third_content = "This is. This is third third the third text"
    third_score = 133
    writer.add_document(title = u(third_title), path= u(third_path),
            myscore=third_score, content = u(third_content))
    writer.commit()
    return



def search(idx_dir,  kw):
    """docstring for search"""

    # Open index
    storage = FileStorage(idx_dir)
    ix = storage.open_index()

    # Searching
    from whoosh.qparser import QueryParser

    pos_weighting = scoring.FunctionWeighting(myscore_fn)

    with ix.searcher(weighting=pos_weighting) as searcher:
        # fields where you want to perform the search 
        search_field = "content"
        query = QueryParser(search_field, ix.schema).parse(kw)
        results = searcher.search(query, limit=10)
        for result in results:
            rank_score = result.score
            print result["title"], " / ",  result["content"], " (Score: ", rank_score, ")"
    return  

if __name__ == '__main__':

    idx_dirname = "whooshindex"
    idx_name = "usages"


    create_index(idx_dirname,idx_name)
    search(idx_dirname, "third")  # search terms

目前打印此结果：

This is. This is third third the third text  (Score: 3)
The second, third one is even more interesting!  (Score: 1)

我希望它返回的是：

This is. This is third third the third text  (Score: 136)
The second, third one is even more interesting!  (Score: 101)

如何通过在Whoosh中获取额外的字段值来设计自己的评分功能

0 个答案: