/**
   * Analyzes the given value using the given Analyzer.
   *
   * @param value Value to analyze
   * @param context The {@link AnalysisContext analysis context}.
   * @return NamedList containing the tokens produced by analyzing the given value
   */
  protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) {

    Analyzer analyzer = context.getAnalyzer();

    if (!TokenizerChain.class.isInstance(analyzer)) {

      TokenStream tokenStream = null;
      try {
        tokenStream = analyzer.reusableTokenStream(context.getFieldName(), new StringReader(value));
        tokenStream.reset();
      } catch (IOException e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
      }
      NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>();
      namedList.add(
          tokenStream.getClass().getName(),
          convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
      return namedList;
    }

    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories();
    TokenizerFactory tfac = tokenizerChain.getTokenizerFactory();
    TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories();

    NamedList<Object> namedList = new NamedList<Object>();

    if (cfiltfacs != null) {
      String source = value;
      for (CharFilterFactory cfiltfac : cfiltfacs) {
        CharStream reader = CharReader.get(new StringReader(source));
        reader = cfiltfac.create(reader);
        source = writeCharStream(namedList, reader);
      }
    }

    TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value)));
    List<AttributeSource> tokens = analyzeTokenStream(tokenStream);

    namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));

    ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens);

    for (TokenFilterFactory tokenFilterFactory : filtfacs) {
      tokenStream = tokenFilterFactory.create(listBasedTokenStream);
      List<AttributeSource> tokenList = analyzeTokenStream(tokenStream);
      namedList.add(
          tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context));
      listBasedTokenStream = new ListBasedTokenStream(tokenList);
    }

    return namedList;
  }
  private static SimpleOrderedMap<Object> getAnalyzerInfo(Analyzer analyzer) {
    SimpleOrderedMap<Object> aninfo = new SimpleOrderedMap<Object>();
    aninfo.add("className", analyzer.getClass().getName());
    if (analyzer instanceof TokenizerChain) {

      TokenizerChain tchain = (TokenizerChain) analyzer;

      CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
      SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<Map<String, Object>>();
      for (CharFilterFactory cfiltfac : cfiltfacs) {
        Map<String, Object> tok = new HashMap<String, Object>();
        String className = cfiltfac.getClass().getName();
        tok.put("className", className);
        tok.put("args", cfiltfac.getArgs());
        cfilters.add(className.substring(className.lastIndexOf('.') + 1), tok);
      }
      if (cfilters.size() > 0) {
        aninfo.add("charFilters", cfilters);
      }

      SimpleOrderedMap<Object> tokenizer = new SimpleOrderedMap<Object>();
      TokenizerFactory tfac = tchain.getTokenizerFactory();
      tokenizer.add("className", tfac.getClass().getName());
      tokenizer.add("args", tfac.getArgs());
      aninfo.add("tokenizer", tokenizer);

      TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
      SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<Map<String, Object>>();
      for (TokenFilterFactory filtfac : filtfacs) {
        Map<String, Object> tok = new HashMap<String, Object>();
        String className = filtfac.getClass().getName();
        tok.put("className", className);
        tok.put("args", filtfac.getArgs());
        filters.add(className.substring(className.lastIndexOf('.') + 1), tok);
      }
      if (filters.size() > 0) {
        aninfo.add("filters", filters);
      }
    }
    return aninfo;
  }
Example #3
0
  /**
   * Returns a description of the given analyzer, by either reporting the Analyzer class name (and
   * optionally luceneMatchVersion) if it's not a TokenizerChain, or if it is, querying each
   * analysis factory for its name and args.
   */
  protected static SimpleOrderedMap<Object> getAnalyzerProperties(Analyzer analyzer) {
    SimpleOrderedMap<Object> analyzerProps = new SimpleOrderedMap<>();

    if (analyzer instanceof TokenizerChain) {
      Map<String, String> factoryArgs;
      TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
      CharFilterFactory[] charFilterFactories = tokenizerChain.getCharFilterFactories();
      if (0 < charFilterFactories.length) {
        List<SimpleOrderedMap<Object>> charFilterProps = new ArrayList<>();
        for (CharFilterFactory charFilterFactory : charFilterFactories) {
          SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
          props.add(CLASS_NAME, charFilterFactory.getClassArg());
          factoryArgs = charFilterFactory.getOriginalArgs();
          if (null != factoryArgs) {
            for (String key : factoryArgs.keySet()) {
              if (!CLASS_NAME.equals(key)) {
                if (LUCENE_MATCH_VERSION_PARAM.equals(key)) {
                  if (charFilterFactory.isExplicitLuceneMatchVersion()) {
                    props.add(key, factoryArgs.get(key));
                  }
                } else {
                  props.add(key, factoryArgs.get(key));
                }
              }
            }
          }
          charFilterProps.add(props);
        }
        analyzerProps.add(CHAR_FILTERS, charFilterProps);
      }

      SimpleOrderedMap<Object> tokenizerProps = new SimpleOrderedMap<>();
      TokenizerFactory tokenizerFactory = tokenizerChain.getTokenizerFactory();
      tokenizerProps.add(CLASS_NAME, tokenizerFactory.getClassArg());
      factoryArgs = tokenizerFactory.getOriginalArgs();
      if (null != factoryArgs) {
        for (String key : factoryArgs.keySet()) {
          if (!CLASS_NAME.equals(key)) {
            if (LUCENE_MATCH_VERSION_PARAM.equals(key)) {
              if (tokenizerFactory.isExplicitLuceneMatchVersion()) {
                tokenizerProps.add(key, factoryArgs.get(key));
              }
            } else {
              tokenizerProps.add(key, factoryArgs.get(key));
            }
          }
        }
      }
      analyzerProps.add(TOKENIZER, tokenizerProps);

      TokenFilterFactory[] filterFactories = tokenizerChain.getTokenFilterFactories();
      if (0 < filterFactories.length) {
        List<SimpleOrderedMap<Object>> filterProps = new ArrayList<>();
        for (TokenFilterFactory filterFactory : filterFactories) {
          SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
          props.add(CLASS_NAME, filterFactory.getClassArg());
          factoryArgs = filterFactory.getOriginalArgs();
          if (null != factoryArgs) {
            for (String key : factoryArgs.keySet()) {
              if (!CLASS_NAME.equals(key)) {
                if (LUCENE_MATCH_VERSION_PARAM.equals(key)) {
                  if (filterFactory.isExplicitLuceneMatchVersion()) {
                    props.add(key, factoryArgs.get(key));
                  }
                } else {
                  props.add(key, factoryArgs.get(key));
                }
              }
            }
          }
          filterProps.add(props);
        }
        analyzerProps.add(FILTERS, filterProps);
      }
    } else { // analyzer is not instanceof TokenizerChain
      analyzerProps.add(CLASS_NAME, analyzer.getClass().getName());
      if (analyzer.getVersion() != Version.LATEST) {
        analyzerProps.add(LUCENE_MATCH_VERSION_PARAM, analyzer.getVersion().toString());
      }
    }
    return analyzerProps;
  }