2016-04-07 12 views
0

Wir versuchen, Lucene schlagen Service (AnalyzingSuggester) für die automatische Vervollständigung und möchten benutzerdefinierte Filter auf die Ergebnisse anwenden. Scheint, dass oder AnalyzingInfixSuggester keine Filter verwendet werden müssen.Lucene Service vorschlagen

Wirklich zu schätzen an allen Eingängen.

+0

Was meinen Sie? Sie möchten der Analyse einen 'TokenFilter' hinzufügen? Das hättest du in den 'Analyzer' einbauen müssen. – femtoRgon

+0

Soweit ich weiß, haben Sie keinen Filtermechanismus für die Suggester. Sie sollten die LookupResults in Ihre eigene Logik einbinden. Sie können ein Beispiel dafür geben, welche Art von Filterung Sie verwenden möchten. Hier finden Sie eine Problemumgehung. – Dhaval

Antwort

0

werde ich Lombok verwenden Kürze halber:

@Getter 
public class Item { 
    private final String suggestibleText; 
    private final String arbitraryData; // additional arbitrary data you want to store in the index 
    /** 
    * These are data you can use for additional filtering 
    */ 
    private final Collection<String> contexts; 
    /* 
    * order suggestion results. Higher weight are returned first. 
    */ 
    private final int weight; 
} 

////

import org.apache.lucene.search.suggest.InputIterator; 
import org.apache.lucene.util.BytesRef; 

import java.io.UnsupportedEncodingException; 
import java.util.HashSet; 
import java.util.Iterator; 
import java.util.Set; 

class ItemIterator implements InputIterator { 

    private final Iterator<Item> entityIterator; 
    private Item currentItem; 

    public ItemIterator(final Iterator<Item> entityIterator) { 
     this.entityIterator = entityIterator; 
    } 

    @Override 
    public boolean hasContexts() { 
     return true; 
    } 

    @Override 
    public boolean hasPayloads() { 
     return true; 
    } 

    @Override 
    public BytesRef next() { 
     if (entityIterator.hasNext()) { 
      currentItem = entityIterator.next(); 
      try { 
       return new BytesRef(currentItem.getSuggestibleText().getBytes("UTF8")); 
      } catch (final UnsupportedEncodingException e) { 
       throw new Error("Couldn't convert to UTF-8"); 
      } 
     } else { // returning null is fine for lucene... 
      return null; 
     } 
    } 

    @Override 
    public BytesRef payload() { // returns null if no payload from Item 
     try { 
      return new BytesRef(currentItem.getArbitraryData().getBytes("UTF8")); 
     } catch (final UnsupportedEncodingException e) { 
      throw new Error("Could not convert to UTF-8"); 
     } 
    } 

    @Override 
    public Set<BytesRef> contexts() { // returns null if no context from Item 
     try { 
      final Set<BytesRef> contexts = new HashSet<>(); 
      for (final String context : currentItem.getContexts()) { 
       contexts.add(new BytesRef(context.getBytes("UTF8"))); 
      } 
      return contexts; 
     } catch (final UnsupportedEncodingException e) { 
      throw new Error("Couldn't convert to UTF-8"); 
     } 
    } 

    @Override 
    public long weight() { 
     return currentItem.getWeight(); 
    } 
} 

///

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 
import org.apache.lucene.search.suggest.Lookup; 
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; 
import org.apache.lucene.store.RAMDirectory; 
import org.apache.lucene.util.BytesRef; 
import java.io.IOException; 
import java.util.ArrayList; 
import java.util.HashSet; 
import java.util.List; 

import static java.util.Arrays.asList; 

public class SuggesterDemo { 

    public static void main(String[] args) throws IOException { 
     final RAMDirectory indexDir = new RAMDirectory(); 
     final WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); 

     final AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(indexDir, analyzer, analyzer, 3, true); 

     final List<Item> entities = new ArrayList<>(); 

     entities.add(new Item("abacus", "", asList("ctx1", "ctx2"), 3)); 
     entities.add(new Item("abandonware", "", asList("ctx1"), 2)); 
     entities.add(new Item("abandon", "", asList("ctx1"), 4)); 
     entities.add(new Item("abash", "", asList("ctx1", "ctx2"), 1)); 

     suggester.build(new ItemIterator(entities.iterator())); 

     // lookup 

     List<Lookup.LookupResult> results; 
     final HashSet<BytesRef> contexts = new HashSet<>(); 
     contexts.add(new BytesRef("ctx1".getBytes("UTF8"))); 
     results = suggester.lookup("ab", contexts, 10, true, true); 

     for (final Lookup.LookupResult result : results) { 
      System.out.println("weight:: " + result.value + " key:: " + result.key + " payload:: " + result.payload.utf8ToString()); 
     } 
     suggester.close(); 
    } 
}