private void addToken(Token oriToken, int termBufferOffset, int termBufferLength, byte type) { Token token = new Token( oriToken.buffer(), termBufferOffset, termBufferLength, oriToken.startOffset() + termBufferOffset, oriToken.startOffset() + termBufferOffset + termBufferLength); if (type == Character.DECIMAL_DIGIT_NUMBER) { token.setType(Word.TYPE_DIGIT); } else { token.setType(Word.TYPE_LETTER); } tokenQueue.offer(token); }
/** * For languages with canonical form * * @return canonical token (or null if none) */ public Token canonizeToken(Token t) { if (!hasCanonicalFilter) return null; if (lang.equals("sr")) { String nt = new SerbianFilter(null).convert(t.termText()); if (!t.equals(nt)) { Token tt = new Token(nt, t.startOffset(), t.endOffset()); tt.setPositionIncrement(0); tt.setType("alias"); return tt; } } return null; }
/** * @param input * @param reusableToken is null well new one auto. * @return null - if not next token or input is null. * @throws IOException */ public static Token nextToken(TokenStream input, Token reusableToken) throws IOException { if (input == null) { return null; } if (!input.incrementToken()) { return null; } CharTermAttribute termAtt = (CharTermAttribute) input.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = (OffsetAttribute) input.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = (TypeAttribute) input.getAttribute(TypeAttribute.class); if (reusableToken == null) { reusableToken = new Token(); } reusableToken.clear(); if (termAtt != null) { // lucene 3.0 // reusableToken.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); // lucene 3.1 reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length()); } if (offsetAtt != null) { // lucene 3.1 // reusableToken.setStartOffset(offsetAtt.startOffset()); // reusableToken.setEndOffset(offsetAtt.endOffset()); // lucene 4.0 reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); } if (typeAtt != null) { reusableToken.setType(typeAtt.type()); } return reusableToken; }