Java Token.length Examples

Programming Language: Java

Namespace/Package Name: org.apache.lucene.analysis

Class/Type: Token

Method/Function: length

Examples at hotexamples.com: 4

Java Token.length - 4 examples found. These are the top rated real world Java examples of org.apache.lucene.analysis.Token.length extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

startOffset(12)

termText(11)

endOffset(10)

type(7)

term(7)

setPositionIncrement(7)

getPositionIncrement(6)

buffer(5)

length(4)

setType(3)

setPositionLength(3)

getPayload(2)

toString(2)

copyBuffer(2)

termLength(2)

termBuffer(2)

getPositionLength(2)

copyToWithoutPayloadClone(1)

setTermBuffer(1)

equals(1)

setStartOffset(1)

getFlags(1)

clear(1)

setEndOffset(1)

payload(1)

setOffset(1)

Example #1

Show file

File: CutLetterDigitFilter.java Project: khaliyo/pet

  private Token nextToken(Token reusableToken) throws IOException {
    assert reusableToken != null;

    // 先使用上次留下来的。
    Token nextToken = tokenQueue.poll();
    if (nextToken != null) {
      return nextToken;
    }

    /*//在 TokenUtils.nextToken 已经调用了 inc
    if(!input.incrementToken()) {
    	return null;
    }*/

    /*TermAttribute termAtt = (TermAttribute)input.getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = (OffsetAttribute)input.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = (TypeAttribute)input.getAttribute(TypeAttribute.class);

    nextToken = reusableToken.reinit(termAtt.termBuffer(), 0, termAtt.termLength(), offsetAtt.startOffset(), offsetAtt.endOffset(), typeAtt.type());*/

    nextToken = TokenUtils.nextToken(input, reusableToken);

    if (nextToken != null
        && (Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type())
            || Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type()))) {
      final char[] buffer = nextToken.buffer();
      final int length = nextToken.length();
      byte lastType = (byte) Character.getType(buffer[0]); // 与上次的字符是否同类
      int termBufferOffset = 0;
      int termBufferLength = 0;
      for (int i = 0; i < length; i++) {
        byte type = (byte) Character.getType(buffer[i]);
        if (type <= Character.MODIFIER_LETTER) {
          type = Character.LOWERCASE_LETTER;
        }
        if (type != lastType) { // 与上一次的不同
          addToken(nextToken, termBufferOffset, termBufferLength, lastType);

          termBufferOffset += termBufferLength;
          termBufferLength = 0;

          lastType = type;
        }

        termBufferLength++;
      }
      if (termBufferLength > 0) { // 最后一次
        addToken(nextToken, termBufferOffset, termBufferLength, lastType);
      }
      nextToken = tokenQueue.poll();
    }

    return nextToken;
  }

Example #2

Show file

File: CutLetterDigitFilter.java Project: khaliyo/pet

 public boolean incrementToken() throws IOException {
   clearAttributes();
   Token token = nextToken(reusableToken);
   if (token != null) {
     termAtt.copyBuffer(token.buffer(), 0, token.length());
     offsetAtt.setOffset(token.startOffset(), token.endOffset());
     typeAtt.setType(token.type());
     return true;
   } else {
     end();
     return false;
   }
 }

Example #3

Show file

File: SlowSynonymMap.java Project: sudarshang/lucene-solr

  /**
   * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
   * the tokens end up at the same position.
   *
   * <p>Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same
   * position) Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a
   * has posInc=n)
   */
  public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
    ArrayList<Token> result = new ArrayList<Token>();
    if (lst1 == null || lst2 == null) {
      if (lst2 != null) result.addAll(lst2);
      if (lst1 != null) result.addAll(lst1);
      return result;
    }

    int pos = 0;
    Iterator<Token> iter1 = lst1.iterator();
    Iterator<Token> iter2 = lst2.iterator();
    Token tok1 = iter1.hasNext() ? iter1.next() : null;
    Token tok2 = iter2.hasNext() ? iter2.next() : null;
    int pos1 = tok1 != null ? tok1.getPositionIncrement() : 0;
    int pos2 = tok2 != null ? tok2.getPositionIncrement() : 0;
    while (tok1 != null || tok2 != null) {
      while (tok1 != null && (pos1 <= pos2 || tok2 == null)) {
        Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
        tok.copyBuffer(tok1.buffer(), 0, tok1.length());
        tok.setPositionIncrement(pos1 - pos);
        result.add(tok);
        pos = pos1;
        tok1 = iter1.hasNext() ? iter1.next() : null;
        pos1 += tok1 != null ? tok1.getPositionIncrement() : 0;
      }
      while (tok2 != null && (pos2 <= pos1 || tok1 == null)) {
        Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
        tok.copyBuffer(tok2.buffer(), 0, tok2.length());
        tok.setPositionIncrement(pos2 - pos);
        result.add(tok);
        pos = pos2;
        tok2 = iter2.hasNext() ? iter2.next() : null;
        pos2 += tok2 != null ? tok2.getPositionIncrement() : 0;
      }
    }
    return result;
  }

Example #4

Show file

File: DiceSuggester.java Project: hardikgw/SolrPlugins

  @Override
  public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
    LOG.debug("getSuggestions: " + options.tokens);
    if (lookup == null) {
      LOG.info("Lookup is null - invoke spellchecker.build first");
      return EMPTY_RESULT;
    }
    SpellingResult res = new SpellingResult();
    CharsRef scratch = new CharsRef();

    for (Token currentToken : options.tokens) {
      scratch.chars = currentToken.buffer();
      scratch.offset = 0;
      scratch.length = currentToken.length();
      boolean onlyMorePopular =
          (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR)
              && !(lookup instanceof WFSTCompletionLookup)
              && !(lookup instanceof AnalyzingSuggester);

      // get more than the requested suggestions as a lot get collapsed by the corrections
      List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count * 10);
      if (suggestions == null || suggestions.size() == 0) {
        continue;
      }

      if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
        Collections.sort(suggestions);
      }

      final LinkedHashMap<String, Integer> lhm = new LinkedHashMap<String, Integer>();
      for (LookupResult lr : suggestions) {
        String suggestion = lr.key.toString();
        if (this.suggestionAnalyzer != null) {
          String correction = getAnalyzerResult(suggestion);
          // multiple could map to the same, so don't repeat suggestions
          if (!isStringNullOrEmpty(correction)) {
            if (lhm.containsKey(correction)) {
              lhm.put(correction, lhm.get(correction) + (int) lr.value);
            } else {
              lhm.put(correction, (int) lr.value);
            }
          }
        } else {
          lhm.put(suggestion, (int) lr.value);
        }

        if (lhm.size() >= options.count) {
          break;
        }
      }

      // sort by new doc frequency
      Map<String, Integer> orderedMap = null;
      if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
        // retain the sort order from above
        orderedMap = lhm;
      } else {
        orderedMap =
            new TreeMap<String, Integer>(
                new Comparator<String>() {
                  @Override
                  public int compare(String s1, String s2) {
                    return lhm.get(s2).compareTo(lhm.get(s1));
                  }
                });
        orderedMap.putAll(lhm);
      }

      for (Map.Entry<String, Integer> entry : orderedMap.entrySet()) {
        res.add(currentToken, entry.getKey(), entry.getValue());
      }
    }
    return res;
  }