예제 #1
0
  private void addToken(Token oriToken, int termBufferOffset, int termBufferLength, byte type) {
    Token token =
        new Token(
            oriToken.buffer(),
            termBufferOffset,
            termBufferLength,
            oriToken.startOffset() + termBufferOffset,
            oriToken.startOffset() + termBufferOffset + termBufferLength);

    if (type == Character.DECIMAL_DIGIT_NUMBER) {
      token.setType(Word.TYPE_DIGIT);
    } else {
      token.setType(Word.TYPE_LETTER);
    }

    tokenQueue.offer(token);
  }
 /**
  * For languages with canonical form
  *
  * @return canonical token (or null if none)
  */
 public Token canonizeToken(Token t) {
   if (!hasCanonicalFilter) return null;
   if (lang.equals("sr")) {
     String nt = new SerbianFilter(null).convert(t.termText());
     if (!t.equals(nt)) {
       Token tt = new Token(nt, t.startOffset(), t.endOffset());
       tt.setPositionIncrement(0);
       tt.setType("alias");
       return tt;
     }
   }
   return null;
 }
예제 #3
0
파일: TokenUtils.java 프로젝트: khaliyo/pet
  /**
   * @param input
   * @param reusableToken is null well new one auto.
   * @return null - if not next token or input is null.
   * @throws IOException
   */
  public static Token nextToken(TokenStream input, Token reusableToken) throws IOException {
    if (input == null) {
      return null;
    }
    if (!input.incrementToken()) {
      return null;
    }

    CharTermAttribute termAtt = (CharTermAttribute) input.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = (OffsetAttribute) input.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = (TypeAttribute) input.getAttribute(TypeAttribute.class);

    if (reusableToken == null) {
      reusableToken = new Token();
    }

    reusableToken.clear();
    if (termAtt != null) {
      // lucene 3.0
      // reusableToken.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
      // lucene 3.1
      reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    }
    if (offsetAtt != null) {
      // lucene 3.1
      // reusableToken.setStartOffset(offsetAtt.startOffset());
      // reusableToken.setEndOffset(offsetAtt.endOffset());
      // lucene 4.0
      reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
    }

    if (typeAtt != null) {
      reusableToken.setType(typeAtt.type());
    }

    return reusableToken;
  }