/** * Analyzes the given value using the given Analyzer. * * @param value Value to analyze * @param context The {@link AnalysisContext analysis context}. * @return NamedList containing the tokens produced by analyzing the given value */ protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) { Analyzer analyzer = context.getAnalyzer(); if (!TokenizerChain.class.isInstance(analyzer)) { TokenStream tokenStream = null; try { tokenStream = analyzer.reusableTokenStream(context.getFieldName(), new StringReader(value)); tokenStream.reset(); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>(); namedList.add( tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context)); return namedList; } TokenizerChain tokenizerChain = (TokenizerChain) analyzer; CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories(); TokenizerFactory tfac = tokenizerChain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories(); NamedList<Object> namedList = new NamedList<Object>(); if (cfiltfacs != null) { String source = value; for (CharFilterFactory cfiltfac : cfiltfacs) { CharStream reader = CharReader.get(new StringReader(source)); reader = cfiltfac.create(reader); source = writeCharStream(namedList, reader); } } TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value))); List<AttributeSource> tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens); for (TokenFilterFactory tokenFilterFactory : filtfacs) { tokenStream = tokenFilterFactory.create(listBasedTokenStream); List<AttributeSource> tokenList = analyzeTokenStream(tokenStream); namedList.add( tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context)); listBasedTokenStream = new ListBasedTokenStream(tokenList); } return namedList; }
/** * Converts the list of Tokens to a list of NamedLists representing the tokens. * * @param tokens Tokens to convert * @param context The analysis context * @return List of NamedLists containing the relevant information taken from the tokens */ private List<NamedList> convertTokensToNamedLists( final List<AttributeSource> tokens, AnalysisContext context) { final List<NamedList> tokensNamedLists = new ArrayList<NamedList>(); final int[] positions = new int[tokens.size()]; int position = 0; for (int i = 0, c = tokens.size(); i < c; i++) { AttributeSource token = tokens.get(i); position += token.addAttribute(PositionIncrementAttribute.class).getPositionIncrement(); positions[i] = position; } // sort the tokens by absoulte position new SorterTemplate() { @Override protected void swap(int i, int j) { final int p = positions[i]; positions[i] = positions[j]; positions[j] = p; Collections.swap(tokens, i, j); } @Override protected int compare(int i, int j) { return positions[i] - positions[j]; } @Override protected void setPivot(int i) { pivot = positions[i]; } @Override protected int comparePivot(int j) { return pivot - positions[j]; } private int pivot; }.mergeSort(0, tokens.size() - 1); FieldType fieldType = context.getFieldType(); final CharArr textBuf = new CharArr(); for (int i = 0, c = tokens.size(); i < c; i++) { AttributeSource token = tokens.get(i); final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>(); final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class); BytesRef rawBytes = termAtt.getBytesRef(); termAtt.fillBytesRef(); textBuf.reset(); fieldType.indexedToReadable(rawBytes, textBuf); final String text = textBuf.toString(); tokenNamedList.add("text", text); if (token.hasAttribute(CharTermAttribute.class)) { final String rawText = token.getAttribute(CharTermAttribute.class).toString(); if (!rawText.equals(text)) { tokenNamedList.add("raw_text", rawText); } } tokenNamedList.add("raw_bytes", rawBytes.toString()); if (context.getTermsToMatch().contains(rawBytes)) { tokenNamedList.add("match", true); } tokenNamedList.add("position", positions[i]); token.reflectWith( new AttributeReflector() { public void reflect(Class<? extends Attribute> attClass, String key, Object value) { // leave out position and bytes term if (TermToBytesRefAttribute.class.isAssignableFrom(attClass)) return; if (CharTermAttribute.class.isAssignableFrom(attClass)) return; if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) return; String k = attClass.getName() + '#' + key; // map keys for "standard attributes": if (ATTRIBUTE_MAPPING.containsKey(k)) { k = ATTRIBUTE_MAPPING.get(k); } if (value instanceof Payload) { final Payload p = (Payload) value; value = new BytesRef(p.getData()).toString(); } tokenNamedList.add(k, value); } }); tokensNamedLists.add(tokenNamedList); } return tokensNamedLists; }