Lucene建议服务

时间:2016-04-07 07:34:08

标签: lucene

我们正在尝试使用Lucene建议服务(AnalyzingSuggester)进行自动填充,并希望对结果应用自定义过滤器。似乎AnalyzingSuggesterAnalyzingInfixSuggester不会应用过滤器。

对任何输入都非常感激。

1 个答案:

答案 0 :(得分:0)

我将使用Lombok简洁:

@Getter
public class Item {
    private final String suggestibleText;
    private final String arbitraryData; // additional arbitrary data you want to store in the index
    /**
     * These are data you can use for additional filtering
     */
    private final Collection<String> contexts;
    /*
     * order suggestion results. Higher weight are returned first.
     */
    private final int weight;
}

////

import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.util.BytesRef;

import java.io.UnsupportedEncodingException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

class ItemIterator implements InputIterator {

    private final Iterator<Item> entityIterator;
    private Item currentItem;

    public ItemIterator(final Iterator<Item> entityIterator) {
        this.entityIterator = entityIterator;
    }

    @Override
    public boolean hasContexts() {
        return true;
    }

    @Override
    public boolean hasPayloads() {
        return true;
    }

    @Override
    public BytesRef next() {
        if (entityIterator.hasNext()) {
            currentItem = entityIterator.next();
            try {
                return new BytesRef(currentItem.getSuggestibleText().getBytes("UTF8"));
            } catch (final UnsupportedEncodingException e) {
                throw new Error("Couldn't convert to UTF-8");
            }
        } else { // returning null is fine for lucene...
            return null;
        }
    }

    @Override
    public BytesRef payload() { // returns null if no payload from Item
        try {
            return new BytesRef(currentItem.getArbitraryData().getBytes("UTF8"));
        } catch (final UnsupportedEncodingException e) {
            throw new Error("Could not convert to UTF-8");
        }
    }

    @Override
    public Set<BytesRef> contexts() { // returns null if no context from Item
        try {
            final Set<BytesRef> contexts = new HashSet<>();
            for (final String context : currentItem.getContexts()) {
                contexts.add(new BytesRef(context.getBytes("UTF8")));
            }
            return contexts;
        } catch (final UnsupportedEncodingException e) {
            throw new Error("Couldn't convert to UTF-8");
        }
    }

    @Override
    public long weight() {
        return currentItem.getWeight();
    }
}

///

import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

import static java.util.Arrays.asList;

public class SuggesterDemo {

    public static void main(String[] args) throws IOException {
        final RAMDirectory indexDir = new RAMDirectory();
        final WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();

        final AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(indexDir, analyzer, analyzer, 3, true);

        final List<Item> entities = new ArrayList<>();

        entities.add(new Item("abacus", "", asList("ctx1", "ctx2"), 3) );
        entities.add(new Item("abandonware", "", asList("ctx1"), 2));
        entities.add(new Item("abandon", "", asList("ctx1"), 4));
        entities.add(new Item("abash", "", asList("ctx1", "ctx2"), 1));

        suggester.build(new ItemIterator(entities.iterator()));

        // lookup

        List<Lookup.LookupResult> results;
        final HashSet<BytesRef> contexts = new HashSet<>();
        contexts.add(new BytesRef("ctx1".getBytes("UTF8")));
        results = suggester.lookup("ab", contexts, 10, true, true);

        for (final Lookup.LookupResult result : results) {
            System.out.println("weight:: " + result.value + " key:: " + result.key + " payload:: " + result.payload.utf8ToString());
        }
        suggester.close();
    }
}