/** * Analyzes the given value using the given Analyzer. * * @param value Value to analyze * @param context The {@link AnalysisContext analysis context}. * @return NamedList containing the tokens produced by analyzing the given value */ protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) { Analyzer analyzer = context.getAnalyzer(); if (!TokenizerChain.class.isInstance(analyzer)) { TokenStream tokenStream = null; try { tokenStream = analyzer.reusableTokenStream(context.getFieldName(), new StringReader(value)); tokenStream.reset(); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>(); namedList.add( tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context)); return namedList; } TokenizerChain tokenizerChain = (TokenizerChain) analyzer; CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories(); TokenizerFactory tfac = tokenizerChain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories(); NamedList<Object> namedList = new NamedList<Object>(); if (cfiltfacs != null) { String source = value; for (CharFilterFactory cfiltfac : cfiltfacs) { CharStream reader = CharReader.get(new StringReader(source)); reader = cfiltfac.create(reader); source = writeCharStream(namedList, reader); } } TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value))); List<AttributeSource> tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens); for (TokenFilterFactory tokenFilterFactory : filtfacs) { tokenStream = tokenFilterFactory.create(listBasedTokenStream); List<AttributeSource> tokenList = analyzeTokenStream(tokenStream); namedList.add( tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context)); listBasedTokenStream = new ListBasedTokenStream(tokenList); } return namedList; }
private static SimpleOrderedMap<Object> getAnalyzerInfo(Analyzer analyzer) { SimpleOrderedMap<Object> aninfo = new SimpleOrderedMap<Object>(); aninfo.add("className", analyzer.getClass().getName()); if (analyzer instanceof TokenizerChain) { TokenizerChain tchain = (TokenizerChain) analyzer; CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories(); SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<Map<String, Object>>(); for (CharFilterFactory cfiltfac : cfiltfacs) { Map<String, Object> tok = new HashMap<String, Object>(); String className = cfiltfac.getClass().getName(); tok.put("className", className); tok.put("args", cfiltfac.getArgs()); cfilters.add(className.substring(className.lastIndexOf('.') + 1), tok); } if (cfilters.size() > 0) { aninfo.add("charFilters", cfilters); } SimpleOrderedMap<Object> tokenizer = new SimpleOrderedMap<Object>(); TokenizerFactory tfac = tchain.getTokenizerFactory(); tokenizer.add("className", tfac.getClass().getName()); tokenizer.add("args", tfac.getArgs()); aninfo.add("tokenizer", tokenizer); TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories(); SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<Map<String, Object>>(); for (TokenFilterFactory filtfac : filtfacs) { Map<String, Object> tok = new HashMap<String, Object>(); String className = filtfac.getClass().getName(); tok.put("className", className); tok.put("args", filtfac.getArgs()); filters.add(className.substring(className.lastIndexOf('.') + 1), tok); } if (filters.size() > 0) { aninfo.add("filters", filters); } } return aninfo; }
/** * Returns a description of the given analyzer, by either reporting the Analyzer class name (and * optionally luceneMatchVersion) if it's not a TokenizerChain, or if it is, querying each * analysis factory for its name and args. */ protected static SimpleOrderedMap<Object> getAnalyzerProperties(Analyzer analyzer) { SimpleOrderedMap<Object> analyzerProps = new SimpleOrderedMap<>(); if (analyzer instanceof TokenizerChain) { Map<String, String> factoryArgs; TokenizerChain tokenizerChain = (TokenizerChain) analyzer; CharFilterFactory[] charFilterFactories = tokenizerChain.getCharFilterFactories(); if (0 < charFilterFactories.length) { List<SimpleOrderedMap<Object>> charFilterProps = new ArrayList<>(); for (CharFilterFactory charFilterFactory : charFilterFactories) { SimpleOrderedMap<Object> props = new SimpleOrderedMap<>(); props.add(CLASS_NAME, charFilterFactory.getClassArg()); factoryArgs = charFilterFactory.getOriginalArgs(); if (null != factoryArgs) { for (String key : factoryArgs.keySet()) { if (!CLASS_NAME.equals(key)) { if (LUCENE_MATCH_VERSION_PARAM.equals(key)) { if (charFilterFactory.isExplicitLuceneMatchVersion()) { props.add(key, factoryArgs.get(key)); } } else { props.add(key, factoryArgs.get(key)); } } } } charFilterProps.add(props); } analyzerProps.add(CHAR_FILTERS, charFilterProps); } SimpleOrderedMap<Object> tokenizerProps = new SimpleOrderedMap<>(); TokenizerFactory tokenizerFactory = tokenizerChain.getTokenizerFactory(); tokenizerProps.add(CLASS_NAME, tokenizerFactory.getClassArg()); factoryArgs = tokenizerFactory.getOriginalArgs(); if (null != factoryArgs) { for (String key : factoryArgs.keySet()) { if (!CLASS_NAME.equals(key)) { if (LUCENE_MATCH_VERSION_PARAM.equals(key)) { if (tokenizerFactory.isExplicitLuceneMatchVersion()) { tokenizerProps.add(key, factoryArgs.get(key)); } } else { tokenizerProps.add(key, factoryArgs.get(key)); } } } } analyzerProps.add(TOKENIZER, tokenizerProps); TokenFilterFactory[] filterFactories = tokenizerChain.getTokenFilterFactories(); if (0 < filterFactories.length) { List<SimpleOrderedMap<Object>> filterProps = new ArrayList<>(); for (TokenFilterFactory filterFactory : filterFactories) { SimpleOrderedMap<Object> props = new SimpleOrderedMap<>(); props.add(CLASS_NAME, filterFactory.getClassArg()); factoryArgs = filterFactory.getOriginalArgs(); if (null != factoryArgs) { for (String key : factoryArgs.keySet()) { if (!CLASS_NAME.equals(key)) { if (LUCENE_MATCH_VERSION_PARAM.equals(key)) { if (filterFactory.isExplicitLuceneMatchVersion()) { props.add(key, factoryArgs.get(key)); } } else { props.add(key, factoryArgs.get(key)); } } } } filterProps.add(props); } analyzerProps.add(FILTERS, filterProps); } } else { // analyzer is not instanceof TokenizerChain analyzerProps.add(CLASS_NAME, analyzer.getClass().getName()); if (analyzer.getVersion() != Version.LATEST) { analyzerProps.add(LUCENE_MATCH_VERSION_PARAM, analyzer.getVersion().toString()); } } return analyzerProps; }