werde ich Lombok verwenden Kürze halber:
@Getter
public class Item {
private final String suggestibleText;
private final String arbitraryData; // additional arbitrary data you want to store in the index
/**
* These are data you can use for additional filtering
*/
private final Collection<String> contexts;
/*
* order suggestion results. Higher weight are returned first.
*/
private final int weight;
}
////
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.util.BytesRef;
import java.io.UnsupportedEncodingException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
class ItemIterator implements InputIterator {
private final Iterator<Item> entityIterator;
private Item currentItem;
public ItemIterator(final Iterator<Item> entityIterator) {
this.entityIterator = entityIterator;
}
@Override
public boolean hasContexts() {
return true;
}
@Override
public boolean hasPayloads() {
return true;
}
@Override
public BytesRef next() {
if (entityIterator.hasNext()) {
currentItem = entityIterator.next();
try {
return new BytesRef(currentItem.getSuggestibleText().getBytes("UTF8"));
} catch (final UnsupportedEncodingException e) {
throw new Error("Couldn't convert to UTF-8");
}
} else { // returning null is fine for lucene...
return null;
}
}
@Override
public BytesRef payload() { // returns null if no payload from Item
try {
return new BytesRef(currentItem.getArbitraryData().getBytes("UTF8"));
} catch (final UnsupportedEncodingException e) {
throw new Error("Could not convert to UTF-8");
}
}
@Override
public Set<BytesRef> contexts() { // returns null if no context from Item
try {
final Set<BytesRef> contexts = new HashSet<>();
for (final String context : currentItem.getContexts()) {
contexts.add(new BytesRef(context.getBytes("UTF8")));
}
return contexts;
} catch (final UnsupportedEncodingException e) {
throw new Error("Couldn't convert to UTF-8");
}
}
@Override
public long weight() {
return currentItem.getWeight();
}
}
///
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import static java.util.Arrays.asList;
public class SuggesterDemo {
public static void main(String[] args) throws IOException {
final RAMDirectory indexDir = new RAMDirectory();
final WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
final AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(indexDir, analyzer, analyzer, 3, true);
final List<Item> entities = new ArrayList<>();
entities.add(new Item("abacus", "", asList("ctx1", "ctx2"), 3));
entities.add(new Item("abandonware", "", asList("ctx1"), 2));
entities.add(new Item("abandon", "", asList("ctx1"), 4));
entities.add(new Item("abash", "", asList("ctx1", "ctx2"), 1));
suggester.build(new ItemIterator(entities.iterator()));
// lookup
List<Lookup.LookupResult> results;
final HashSet<BytesRef> contexts = new HashSet<>();
contexts.add(new BytesRef("ctx1".getBytes("UTF8")));
results = suggester.lookup("ab", contexts, 10, true, true);
for (final Lookup.LookupResult result : results) {
System.out.println("weight:: " + result.value + " key:: " + result.key + " payload:: " + result.payload.utf8ToString());
}
suggester.close();
}
}
Was meinen Sie? Sie möchten der Analyse einen 'TokenFilter' hinzufügen? Das hättest du in den 'Analyzer' einbauen müssen. – femtoRgon
Soweit ich weiß, haben Sie keinen Filtermechanismus für die Suggester. Sie sollten die LookupResults in Ihre eigene Logik einbinden. Sie können ein Beispiel dafür geben, welche Art von Filterung Sie verwenden möchten. Hier finden Sie eine Problemumgehung. – Dhaval