Exemplo n.º 1
0
 public boolean incrementToken() throws IOException {
   if (inPhrase) {
     inPhrase = false;
     clearAttributes();
     termAtt.setTermBuffer("phrase2");
     offsetAtt.setOffset(savedStart, savedEnd);
     return true;
   } else
     while (input.incrementToken()) {
       if (termAtt.term().equals("phrase")) {
         inPhrase = true;
         savedStart = offsetAtt.startOffset();
         savedEnd = offsetAtt.endOffset();
         termAtt.setTermBuffer("phrase1");
         offsetAtt.setOffset(savedStart, savedEnd);
         return true;
       } else if (!termAtt.term().equals("stop")) return true;
     }
   return false;
 }
  public boolean incrementToken() throws IOException {
    if (!input.incrementToken()) // Advance to next token
    return false; // When false, end has been reached

    String encoded;
    encoded = metaphoner.encode(termAttr.term()); // Convert term text to
    // Metaphone encoding
    termAttr.setTermBuffer(encoded); // Overwrite term text with encoded
    // text
    typeAttr.setType(METAPHONE); // Set token type
    return true;
  }
  @Override
  public boolean incrementToken() throws IOException {
    if (!input.incrementToken()) return false;

    char[] termBuffer = termAtt.termBuffer();
    int termBufferLength = termAtt.termLength();
    char[] backup = null;
    if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
      // make a backup in case we exceed the word count
      backup = new char[termBufferLength];
      System.arraycopy(termBuffer, 0, backup, 0, termBufferLength);
    }
    if (termBufferLength < factory.maxTokenLength) {
      int wordCount = 0;

      int lastWordStart = 0;
      for (int i = 0; i < termBufferLength; i++) {
        char c = termBuffer[i];
        if (c <= ' ' || c == '.') {
          int len = i - lastWordStart;
          if (len > 0) {
            factory.processWord(termBuffer, lastWordStart, len, wordCount++);
            lastWordStart = i + 1;
            i++;
          }
        }
      }

      // process the last word
      if (lastWordStart < termBufferLength) {
        factory.processWord(
            termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
      }

      if (wordCount > factory.maxWordCount) {
        termAtt.setTermBuffer(backup, 0, termBufferLength);
      }
    }

    return true;
  }
 private void applyToken(Token token) {
   termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
   posAtt.setPositionIncrement(token.getPositionIncrement());
   offsetAtt.setOffset(token.startOffset(), token.endOffset());
 }
  public static void assertTokenStreamContents(
      TokenStream ts,
      String[] output,
      int startOffsets[],
      int endOffsets[],
      String types[],
      int posIncrements[],
      Integer finalOffset)
      throws IOException {
    assertNotNull(output);
    CheckClearAttributesAttribute checkClearAtt =
        (CheckClearAttributesAttribute) ts.addAttribute(CheckClearAttributesAttribute.class);

    assertTrue("has no TermAttribute", ts.hasAttribute(TermAttribute.class));
    TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class);

    OffsetAttribute offsetAtt = null;
    if (startOffsets != null || endOffsets != null || finalOffset != null) {
      assertTrue("has no OffsetAttribute", ts.hasAttribute(OffsetAttribute.class));
      offsetAtt = (OffsetAttribute) ts.getAttribute(OffsetAttribute.class);
    }

    TypeAttribute typeAtt = null;
    if (types != null) {
      assertTrue("has no TypeAttribute", ts.hasAttribute(TypeAttribute.class));
      typeAtt = (TypeAttribute) ts.getAttribute(TypeAttribute.class);
    }

    PositionIncrementAttribute posIncrAtt = null;
    if (posIncrements != null) {
      assertTrue(
          "has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
      posIncrAtt = (PositionIncrementAttribute) ts.getAttribute(PositionIncrementAttribute.class);
    }

    ts.reset();
    for (int i = 0; i < output.length; i++) {
      // extra safety to enforce, that the state is not preserved and also assign bogus values
      ts.clearAttributes();
      termAtt.setTermBuffer("bogusTerm");
      if (offsetAtt != null) offsetAtt.setOffset(14584724, 24683243);
      if (typeAtt != null) typeAtt.setType("bogusType");
      if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);

      checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
      assertTrue("token " + i + " does not exist", ts.incrementToken());
      assertTrue(
          "clearAttributes() was not called correctly in TokenStream chain",
          checkClearAtt.getAndResetClearCalled());

      assertEquals("term " + i, output[i], termAtt.term());
      if (startOffsets != null)
        assertEquals("startOffset " + i, startOffsets[i], offsetAtt.startOffset());
      if (endOffsets != null) assertEquals("endOffset " + i, endOffsets[i], offsetAtt.endOffset());
      if (types != null) assertEquals("type " + i, types[i], typeAtt.type());
      if (posIncrements != null)
        assertEquals("posIncrement " + i, posIncrements[i], posIncrAtt.getPositionIncrement());
    }
    assertFalse("end of stream", ts.incrementToken());
    ts.end();
    if (finalOffset != null)
      assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset());
    ts.close();
  }