Esempio n. 1
0
  public void testLUCENE_3042() throws Exception {
    String testString = "t";

    Analyzer analyzer = new MockAnalyzer(random());
    try (TokenStream stream = analyzer.tokenStream("dummy", testString)) {
      stream.reset();
      while (stream.incrementToken()) {
        // consume
      }
      stream.end();
    }

    assertAnalyzesTo(analyzer, testString, new String[] {"t"});
  }
Esempio n. 2
0
 /** Test a configuration where three characters makes a term */
 public void testThreeChars() throws Exception {
   CharacterRunAutomaton single = new CharacterRunAutomaton(new RegExp("...").toAutomaton());
   Analyzer a = new MockAnalyzer(random(), single, false);
   assertAnalyzesTo(a, "foobar", new String[] {"foo", "bar"}, new int[] {0, 3}, new int[] {3, 6});
   // make sure when last term is a "partial" match that end() is correct
   assertTokenStreamContents(
       a.tokenStream("bogus", "fooba"),
       new String[] {"foo"},
       new int[] {0},
       new int[] {3},
       new int[] {1},
       new Integer(5));
   checkRandomData(random(), a, 100);
 }
Esempio n. 3
0
  /** Test MockTokenizer encountering a too long token */
  public void testTooLongToken() throws Exception {
    Analyzer whitespace =
        new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false, 5);
            return new TokenStreamComponents(t, t);
          }
        };

    assertTokenStreamContents(
        whitespace.tokenStream("bogus", "test 123 toolong ok "),
        new String[] {"test", "123", "toolo", "ng", "ok"},
        new int[] {0, 5, 9, 14, 17},
        new int[] {4, 8, 14, 16, 19},
        new Integer(20));

    assertTokenStreamContents(
        whitespace.tokenStream("bogus", "test 123 toolo"),
        new String[] {"test", "123", "toolo"},
        new int[] {0, 5, 9},
        new int[] {4, 8, 14},
        new Integer(14));
  }
  protected List<BytesRef> analyze(String text, String field, Analyzer analyzer)
      throws IOException {
    List<BytesRef> bytesRefs = new ArrayList<>();

    try (TokenStream tokenStream = analyzer.tokenStream(field, text)) {
      TermToBytesRefAttribute termAttribute =
          tokenStream.getAttribute(TermToBytesRefAttribute.class);

      BytesRef bytesRef = termAttribute.getBytesRef();

      tokenStream.reset();

      while (tokenStream.incrementToken()) {
        termAttribute.fillBytesRef();
        bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
      }

      tokenStream.end();
    }

    return bytesRefs;
  }