@Override
    public final TokenStream reusableTokenStream(String fieldName, Reader reader)
        throws IOException {
      int dotIndex = fieldName.indexOf('.');
      if (dotIndex != -1) {
        String possibleType = fieldName.substring(0, dotIndex);
        DocumentMapper possibleDocMapper = mappers.get(possibleType);
        if (possibleDocMapper != null) {
          return possibleDocMapper
              .mappers()
              .searchAnalyzer()
              .reusableTokenStream(fieldName, reader);
        }
      }
      FieldMappers mappers = fullNameFieldMappers.get(fieldName);
      if (mappers != null
          && mappers.mapper() != null
          && mappers.mapper().searchAnalyzer() != null) {
        return mappers.mapper().searchAnalyzer().reusableTokenStream(fieldName, reader);
      }

      mappers = indexNameFieldMappers.get(fieldName);
      if (mappers != null
          && mappers.mapper() != null
          && mappers.mapper().searchAnalyzer() != null) {
        return mappers.mapper().searchAnalyzer().reusableTokenStream(fieldName, reader);
      }
      return defaultAnalyzer.reusableTokenStream(fieldName, reader);
    }
Example #2
0
  static List<String> getAllTermsFromText(String fieldName, String localText, Analyzer analyzer)
      throws IOException {
    List<String> terms = new ArrayList<String>();

    // Can't deal with null at this point. Likely returned by some FieldBridge not recognizing the
    // type.
    if (localText == null) {
      throw new SearchException(
          "Search parameter on field "
              + fieldName
              + " could not be converted. "
              + "Are the parameter and the field of the same type?"
              + "Alternatively, apply the ignoreFieldBridge() option to "
              + "pass String parameters");
    }
    Reader reader = new StringReader(localText);
    TokenStream stream = analyzer.reusableTokenStream(fieldName, reader);
    TermAttribute attribute = stream.addAttribute(TermAttribute.class);
    stream.reset();

    while (stream.incrementToken()) {
      if (attribute.termLength() > 0) {
        String term = attribute.term();
        terms.add(term);
      }
    }
    stream.end();
    stream.close();
    return terms;
  }
Example #3
0
  @Test
  public void testCase2() throws Exception {
    StringReader reader = new StringReader("고속도로");

    nouns.add(getToken("고속도로", 0, 4));
    nouns.add(getToken("고속도", 0, 3));
    nouns.add(getToken("고속", 0, 2));
    nouns.add(getToken("속도", 1, 3));
    nouns.add(getToken("고", 0, 1));

    Analyzer analyzer = new KoreanAnalyzer();
    TokenStream stream = analyzer.reusableTokenStream("dummy", reader);

    CharTermAttribute charTermAtt = stream.getAttribute(CharTermAttribute.class);
    OffsetAttribute offSetAtt = stream.getAttribute(OffsetAttribute.class);

    while (stream.incrementToken()) {
      TestToken t =
          getToken(charTermAtt.toString(), offSetAtt.startOffset(), offSetAtt.endOffset());
      System.out.println("termAtt.term() : " + charTermAtt.toString());
      System.out.println("offSetAtt : " + offSetAtt.startOffset());
      System.out.println("offSetAtt : " + offSetAtt.endOffset());

      Assert.assertTrue(nouns.contains(t));
    }
  }
  /**
   * Analyzes the given value using the given Analyzer.
   *
   * @param value Value to analyze
   * @param context The {@link AnalysisContext analysis context}.
   * @return NamedList containing the tokens produced by analyzing the given value
   */
  protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) {

    Analyzer analyzer = context.getAnalyzer();

    if (!TokenizerChain.class.isInstance(analyzer)) {

      TokenStream tokenStream = null;
      try {
        tokenStream = analyzer.reusableTokenStream(context.getFieldName(), new StringReader(value));
        tokenStream.reset();
      } catch (IOException e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
      }
      NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>();
      namedList.add(
          tokenStream.getClass().getName(),
          convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
      return namedList;
    }

    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories();
    TokenizerFactory tfac = tokenizerChain.getTokenizerFactory();
    TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories();

    NamedList<Object> namedList = new NamedList<Object>();

    if (cfiltfacs != null) {
      String source = value;
      for (CharFilterFactory cfiltfac : cfiltfacs) {
        CharStream reader = CharReader.get(new StringReader(source));
        reader = cfiltfac.create(reader);
        source = writeCharStream(namedList, reader);
      }
    }

    TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value)));
    List<AttributeSource> tokens = analyzeTokenStream(tokenStream);

    namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));

    ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens);

    for (TokenFilterFactory tokenFilterFactory : filtfacs) {
      tokenStream = tokenFilterFactory.create(listBasedTokenStream);
      List<AttributeSource> tokenList = analyzeTokenStream(tokenStream);
      namedList.add(
          tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context));
      listBasedTokenStream = new ListBasedTokenStream(tokenList);
    }

    return namedList;
  }
 public static void assertAnalyzesToReuse(
     Analyzer a,
     String input,
     String[] output,
     int startOffsets[],
     int endOffsets[],
     String types[],
     int posIncrements[])
     throws IOException {
   assertTokenStreamContents(
       a.reusableTokenStream("dummy", new StringReader(input)),
       output,
       startOffsets,
       endOffsets,
       types,
       posIncrements,
       new Integer(input.length()));
 }