public String tokens(String field) {
   try {
     Field f = doc.getField(field);
     if (f == null) fail("No such field " + field);
     if (!f.isTokenized()) {
       String val = value(field);
       Token t = new Token(val, 0, val.length());
       return t.getPositionIncrement() + " [" + t.termText() + "]";
     }
     TokenStream ts = f.tokenStreamValue();
     if (ts == null && f.stringValue() != null) ts = analyzer.tokenStream(field, f.stringValue());
     if (ts == null && f.readerValue() != null) ts = analyzer.tokenStream(field, f.readerValue());
     if (ts == null) fail("No token stream for field " + field);
     Token t = null;
     StringBuilder sb = new StringBuilder();
     while ((t = ts.next()) != null) {
       sb.append(t.getPositionIncrement() + " [" + t.termText() + "] ");
     }
     return sb.toString().trim();
   } catch (Exception e) {
     e.printStackTrace();
     fail(e.getMessage());
     return null;
   }
 }
Esempio n. 2
0
  /**
   * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
   * the tokens end up at the same position.
   *
   * <p>Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same
   * position) Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a
   * has posInc=n)
   */
  public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
    ArrayList<Token> result = new ArrayList<Token>();
    if (lst1 == null || lst2 == null) {
      if (lst2 != null) result.addAll(lst2);
      if (lst1 != null) result.addAll(lst1);
      return result;
    }

    int pos = 0;
    Iterator<Token> iter1 = lst1.iterator();
    Iterator<Token> iter2 = lst2.iterator();
    Token tok1 = iter1.hasNext() ? iter1.next() : null;
    Token tok2 = iter2.hasNext() ? iter2.next() : null;
    int pos1 = tok1 != null ? tok1.getPositionIncrement() : 0;
    int pos2 = tok2 != null ? tok2.getPositionIncrement() : 0;
    while (tok1 != null || tok2 != null) {
      while (tok1 != null && (pos1 <= pos2 || tok2 == null)) {
        Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
        tok.copyBuffer(tok1.buffer(), 0, tok1.length());
        tok.setPositionIncrement(pos1 - pos);
        result.add(tok);
        pos = pos1;
        tok1 = iter1.hasNext() ? iter1.next() : null;
        pos1 += tok1 != null ? tok1.getPositionIncrement() : 0;
      }
      while (tok2 != null && (pos2 <= pos1 || tok1 == null)) {
        Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
        tok.copyBuffer(tok2.buffer(), 0, tok2.length());
        tok.setPositionIncrement(pos2 - pos);
        result.add(tok);
        pos = pos2;
        tok2 = iter2.hasNext() ? iter2.next() : null;
        pos2 += tok2 != null ? tok2.getPositionIncrement() : 0;
      }
    }
    return result;
  }
Esempio n. 3
0
 @Override
 public boolean incrementToken() throws IOException {
   if (index >= tokens.length) return false;
   else {
     clearAttributes();
     Token token = tokens[index++];
     termAtt.setEmpty().append(token);
     offsetAtt.setOffset(token.startOffset(), token.endOffset());
     posIncAtt.setPositionIncrement(token.getPositionIncrement());
     flagsAtt.setFlags(token.getFlags());
     typeAtt.setType(token.type());
     payloadAtt.setPayload(token.getPayload());
     return true;
   }
 }
Esempio n. 4
0
 @Override
 public boolean incrementToken() {
   if (upto < tokens.length) {
     final Token token = tokens[upto++];
     // TODO: can we just capture/restoreState so
     // we get all attrs...?
     clearAttributes();
     termAtt.setEmpty();
     termAtt.append(token.toString());
     posIncrAtt.setPositionIncrement(token.getPositionIncrement());
     posLengthAtt.setPositionLength(token.getPositionLength());
     offsetAtt.setOffset(token.startOffset(), token.endOffset());
     payloadAtt.setPayload(token.getPayload());
     return true;
   } else {
     return false;
   }
 }
 @Override
 public boolean incrementToken() throws IOException {
   if (tokens == null) {
     fillTokens();
   }
   // System.out.println("graphTokenizer: incr upto=" + upto + " vs " + tokens.size());
   if (upto == tokens.size()) {
     // System.out.println("  END @ " + tokens.size());
     return false;
   }
   final Token t = tokens.get(upto++);
   // System.out.println("  return token=" + t);
   clearAttributes();
   termAtt.append(t.toString());
   offsetAtt.setOffset(t.startOffset(), t.endOffset());
   posIncrAtt.setPositionIncrement(t.getPositionIncrement());
   posLengthAtt.setPositionLength(t.getPositionLength());
   return true;
 }
 private void applyToken(Token token) {
   termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
   posAtt.setPositionIncrement(token.getPositionIncrement());
   offsetAtt.setOffset(token.startOffset(), token.endOffset());
 }