@Override public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { int dotIndex = fieldName.indexOf('.'); if (dotIndex != -1) { String possibleType = fieldName.substring(0, dotIndex); DocumentMapper possibleDocMapper = mappers.get(possibleType); if (possibleDocMapper != null) { return possibleDocMapper .mappers() .searchAnalyzer() .reusableTokenStream(fieldName, reader); } } FieldMappers mappers = fullNameFieldMappers.get(fieldName); if (mappers != null && mappers.mapper() != null && mappers.mapper().searchAnalyzer() != null) { return mappers.mapper().searchAnalyzer().reusableTokenStream(fieldName, reader); } mappers = indexNameFieldMappers.get(fieldName); if (mappers != null && mappers.mapper() != null && mappers.mapper().searchAnalyzer() != null) { return mappers.mapper().searchAnalyzer().reusableTokenStream(fieldName, reader); } return defaultAnalyzer.reusableTokenStream(fieldName, reader); }
static List<String> getAllTermsFromText(String fieldName, String localText, Analyzer analyzer) throws IOException { List<String> terms = new ArrayList<String>(); // Can't deal with null at this point. Likely returned by some FieldBridge not recognizing the // type. if (localText == null) { throw new SearchException( "Search parameter on field " + fieldName + " could not be converted. " + "Are the parameter and the field of the same type?" + "Alternatively, apply the ignoreFieldBridge() option to " + "pass String parameters"); } Reader reader = new StringReader(localText); TokenStream stream = analyzer.reusableTokenStream(fieldName, reader); TermAttribute attribute = stream.addAttribute(TermAttribute.class); stream.reset(); while (stream.incrementToken()) { if (attribute.termLength() > 0) { String term = attribute.term(); terms.add(term); } } stream.end(); stream.close(); return terms; }
@Test public void testCase2() throws Exception { StringReader reader = new StringReader("고속도로"); nouns.add(getToken("고속도로", 0, 4)); nouns.add(getToken("고속도", 0, 3)); nouns.add(getToken("고속", 0, 2)); nouns.add(getToken("속도", 1, 3)); nouns.add(getToken("고", 0, 1)); Analyzer analyzer = new KoreanAnalyzer(); TokenStream stream = analyzer.reusableTokenStream("dummy", reader); CharTermAttribute charTermAtt = stream.getAttribute(CharTermAttribute.class); OffsetAttribute offSetAtt = stream.getAttribute(OffsetAttribute.class); while (stream.incrementToken()) { TestToken t = getToken(charTermAtt.toString(), offSetAtt.startOffset(), offSetAtt.endOffset()); System.out.println("termAtt.term() : " + charTermAtt.toString()); System.out.println("offSetAtt : " + offSetAtt.startOffset()); System.out.println("offSetAtt : " + offSetAtt.endOffset()); Assert.assertTrue(nouns.contains(t)); } }
/** * Analyzes the given value using the given Analyzer. * * @param value Value to analyze * @param context The {@link AnalysisContext analysis context}. * @return NamedList containing the tokens produced by analyzing the given value */ protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) { Analyzer analyzer = context.getAnalyzer(); if (!TokenizerChain.class.isInstance(analyzer)) { TokenStream tokenStream = null; try { tokenStream = analyzer.reusableTokenStream(context.getFieldName(), new StringReader(value)); tokenStream.reset(); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>(); namedList.add( tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context)); return namedList; } TokenizerChain tokenizerChain = (TokenizerChain) analyzer; CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories(); TokenizerFactory tfac = tokenizerChain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories(); NamedList<Object> namedList = new NamedList<Object>(); if (cfiltfacs != null) { String source = value; for (CharFilterFactory cfiltfac : cfiltfacs) { CharStream reader = CharReader.get(new StringReader(source)); reader = cfiltfac.create(reader); source = writeCharStream(namedList, reader); } } TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value))); List<AttributeSource> tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens); for (TokenFilterFactory tokenFilterFactory : filtfacs) { tokenStream = tokenFilterFactory.create(listBasedTokenStream); List<AttributeSource> tokenList = analyzeTokenStream(tokenStream); namedList.add( tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context)); listBasedTokenStream = new ListBasedTokenStream(tokenList); } return namedList; }
public static void assertAnalyzesToReuse( Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { assertTokenStreamContents( a.reusableTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, new Integer(input.length())); }