2016-05-25 7 views
0

Für Lucene 3.6.2 ich folgende Analyzer müssen Lucene:Hafen Lucene 3.6.2 Analyzer 5.5.0

public final class StandardAnalyzerV36 extends Analyzer { 

    private Analyzer analyzer; 

    public StandardAnalyzerV36() { 
     analyzer = new StandardAnalyzer(Version.LUCENE_36); 
    } 

    public StandardAnalyzerV36(Set<?> stopWords) { 
     analyzer = new StandardAnalyzer(Version.LUCENE_36, stopWords); 
    } 

    @Override 
    public final TokenStream tokenStream(String fieldName, Reader reader) { 
     return analyzer.tokenStream(fieldName, new HTMLStripCharFilter(CharReader.get(reader))); 
    } 

    @Override 
    public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { 
     return analyzer.reusableTokenStream(fieldName, reader); 
    } 

} 

Könnten Sie bitte für Lucene 5.5.0 mich in den Hafen auf Analyzer helfen? Die Analyzer-Schnittstelle wurde in der neuen Version geändert.

AKTUALISIERT

ich diesen Analyzer neu implementiert habe folgende:

public final class StandardAnalyzerV36 extends Analyzer { 

    public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 

    @Override 
    protected TokenStreamComponents createComponents(String fieldName) { 

     final ClassicTokenizer src = new ClassicTokenizer(); 
     TokenStream tok = new StandardFilter(src); 
     tok = new StopFilter(new LowerCaseFilter(tok), STOP_WORDS_SET); 
     return new TokenStreamComponents(src, tok); 
    } 

    @Override 
    protected Reader initReader(String fieldName, Reader reader) { 
     return new HTMLStripCharFilter(reader); 
    } 

aber meine Tests nicht auf folgenden Aufruf:

tokens = LuceneUtils.tokenizeString(analyzer, "[{(RDBMS)}]"); 

public static List<String> tokenizeString(Analyzer analyzer, String string) { 
     List<String> result = new ArrayList<String>(); 
     try { 
      TokenStream stream = analyzer.tokenStream(null, new StringReader(string)); 
      stream.reset(); 
      while (stream.incrementToken()) { 
       result.add(stream.getAttribute(CharTermAttribute.class).toString()); 
      } 
     } catch (IOException e) { 
      // not thrown b/c we're using a string reader... 
      throw new RuntimeException(e); 
     } 
     return result; 
    } 

mit folgenden Ausnahme:

java.lang.IllegalStateException: TokenStream contract violation: close() call missing 
    at org.apache.lucene.analysis.Tokenizer.setReader(Tokenizer.java:90) 
    at org.apache.lucene.analysis.Analyzer$TokenStreamComponents.setReader(Analyzer.java:315) 
    at org.apache.lucene.analysis.Analyzer.tokenStream(Analyzer.java:143) 

Was ist falsch an diesem Code?

Antwort

0

Endlich habe ich es funktioniert:

public final class StandardAnalyzerV36 extends Analyzer { 

    public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 

    @Override 
    protected TokenStreamComponents createComponents(String fieldName) { 

     final ClassicTokenizer src = new ClassicTokenizer(); 
     TokenStream tok = new StandardFilter(src); 
     tok = new StopFilter(new LowerCaseFilter(tok), STOP_WORDS_SET); 

     return new TokenStreamComponents(src, tok); 
    } 

    @Override 
    protected Reader initReader(String fieldName, Reader reader) { 
     return new HTMLStripCharFilter(reader); 
    } 
} 

public class LuceneUtils { 

    public static List<String> tokenizeString(Analyzer analyzer, String string) { 
     List<String> result = new ArrayList<String>(); 
     TokenStream stream = null; 
     try { 
      stream = analyzer.tokenStream(null, new StringReader(string)); 
      stream.reset(); 
      while (stream.incrementToken()) { 
       result.add(stream.getAttribute(CharTermAttribute.class).toString()); 
      } 
     } catch (IOException e) { 
      // not thrown b/c we're using a string reader... 
      throw new RuntimeException(e); 
     } finally { 
      IOUtils.closeQuietly(stream); 
     } 
     return result; 
    } 
}