Java CharTermAttribute.resizeBuffer Examples

Programming Language: Java

Namespace/Package Name: org.apache.lucene.analysis.tokenattributes

Method/Function: resizeBuffer

Examples at hotexamples.com: 9

Java CharTermAttribute.resizeBuffer - 9 examples found. These are the top rated real world Java examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute.resizeBuffer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

buffer(30)

length(30)

toString(30)

setEmpty(25)

setLength(16)

append(15)

copyBuffer(12)

resizeBuffer(9)

charAt(1)

Example #1

Show file

File: KeywordTokenizer.java Project: sugarlisu/solr_4.9.0

 public KeywordTokenizer(AttributeFactory factory, Reader input, int bufferSize) {
   super(factory, input);
   if (bufferSize <= 0) {
     throw new IllegalArgumentException("bufferSize must be > 0");
   }
   termAtt.resizeBuffer(bufferSize);
 }

Example #2

Show file

File: PinyinTokenizer.java Project: stgrandet/elasticsearch-analysis-pinyin

 public PinyinTokenizer(Reader input, int bufferSize) {
   super(input);
   termAtt.resizeBuffer(bufferSize);
   format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
   format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
   format.setVCharType(HanyuPinyinVCharType.WITH_V);
 }

Example #3

Show file

File: NGTokenizer.java Project: bireme/NGrams

 public NGTokenizer(int ngramSize) {
   super();
   if (ngramSize < 1) {
     throw new IllegalArgumentException("ngramSize < 1");
   }
   this.ngramSize = ngramSize;
   termAtt = addAttribute(CharTermAttribute.class);
   termAtt.resizeBuffer(ngramSize);
   buffer = new char[ngramSize];
   for (int idx = 0; idx < ngramSize; idx++) {
     buffer[idx] = ' ';
   }
 }

Example #4

Show file

File: GalicianStemFilter.java Project: naryad/Solr4.0

 @Override
 public boolean incrementToken() throws IOException {
   if (input.incrementToken()) {
     if (!keywordAttr.isKeyword()) {
       // this stemmer increases word length by 1: worst case '*çom' -> '*ción'
       final int len = termAtt.length();
       final int newlen = stemmer.stem(termAtt.resizeBuffer(len + 1), len);
       termAtt.setLength(newlen);
     }
     return true;
   } else {
     return false;
   }
 }

Example #5

Show file

File: HyphenatedWordsFilter.java Project: simplegeo/lucene-solr-3.1

  /** Writes the joined unhyphenated term */
  private void unhyphenate() {
    int endOffset = offsetAttribute.endOffset();

    restoreState(savedState);
    savedState = null;

    char term[] = termAttribute.buffer();
    int length = hyphenated.length();
    if (length > termAttribute.length()) {
      term = termAttribute.resizeBuffer(length);
    }

    hyphenated.getChars(0, length, term, 0);
    termAttribute.setLength(length);
    offsetAttribute.setOffset(offsetAttribute.startOffset(), endOffset);
    hyphenated.setLength(0);
  }

Example #6

Show file

File: ICUCollationKeyFilter.java Project: CedarLabs/elasticsearch

 @Override
 public boolean incrementToken() throws IOException {
   if (input.incrementToken()) {
     char[] termBuffer = termAtt.buffer();
     String termText = new String(termBuffer, 0, termAtt.length());
     collator.getRawCollationKey(termText, reusableKey);
     int encodedLength =
         IndexableBinaryStringTools.getEncodedLength(reusableKey.bytes, 0, reusableKey.size);
     if (encodedLength > termBuffer.length) {
       termAtt.resizeBuffer(encodedLength);
     }
     termAtt.setLength(encodedLength);
     IndexableBinaryStringTools.encode(
         reusableKey.bytes, 0, reusableKey.size, termAtt.buffer(), 0, encodedLength);
     return true;
   } else {
     return false;
   }
 }

Example #7

Show file

File: KeywordTokenizer.java Project: sugarlisu/solr_4.9.0

 @Override
 public final boolean incrementToken() throws IOException {
   if (!done) {
     clearAttributes();
     done = true;
     int upto = 0;
     char[] buffer = termAtt.buffer();
     while (true) {
       final int length = input.read(buffer, upto, buffer.length - upto);
       if (length == -1) break;
       upto += length;
       if (upto == buffer.length) buffer = termAtt.resizeBuffer(1 + buffer.length);
     }
     termAtt.setLength(upto);
     finalOffset = correctOffset(upto);
     offsetAtt.setOffset(correctOffset(0), finalOffset);
     return true;
   }
   return false;
 }

Example #8

Show file

File: PinyinTokenizer.java Project: stgrandet/elasticsearch-analysis-pinyin

  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();

    if (!done) {
      done = true;
      int upto = 0;
      char[] buffer = termAtt.buffer();
      while (true) {
        final int length = input.read(buffer, upto, buffer.length - upto);
        if (length == -1) break;
        upto += length;
        if (upto == buffer.length) buffer = termAtt.resizeBuffer(1 + buffer.length);
      }
      termAtt.setLength(upto);
      String str = termAtt.toString();
      termAtt.setEmpty();
      StringBuilder stringBuilder = new StringBuilder();
      StringBuilder firstLetters = new StringBuilder();
      for (int i = 0; i < str.length(); i++) {
        char c = str.charAt(i);
        if (c < 128) {
          stringBuilder.append(c);
        } else {
          try {
            String[] strs = PinyinHelper.toHanyuPinyinStringArray(c, format);
            if (strs != null) {
              // get first result by default
              String first_value = strs[0];
              // TODO more than one pinyin
              stringBuilder.append(first_value);
              if (this.padding_char.length() > 0) {
                stringBuilder.append(this.padding_char);
              }
              firstLetters.append(first_value.charAt(0));
            }
          } catch (BadHanyuPinyinOutputFormatCombination badHanyuPinyinOutputFormatCombination) {
            badHanyuPinyinOutputFormatCombination.printStackTrace();
          }
        }
      }

      // let's join them
      if (first_letter.equals("prefix")) {
        termAtt.append(firstLetters.toString());
        if (this.padding_char.length() > 0) {
          termAtt.append(this.padding_char); // TODO splitter
        }
        termAtt.append(stringBuilder.toString());
      } else if (first_letter.equals("append")) {
        termAtt.append(stringBuilder.toString());
        if (this.padding_char.length() > 0) {
          if (!stringBuilder.toString().endsWith(this.padding_char)) {
            termAtt.append(this.padding_char);
          }
        }
        termAtt.append(firstLetters.toString());
      } else if (first_letter.equals("none")) {
        termAtt.append(stringBuilder.toString());
      } else if (first_letter.equals("only")) {
        termAtt.append(firstLetters.toString());
      }

      finalOffset = correctOffset(upto);
      offsetAtt.setOffset(correctOffset(0), finalOffset);
      return true;
    }
    return false;
  }

Example #9

Show file

File: TempCharTokenizer.java Project: netcrazy/jaso-analyzer-5.x

  /**
   * lucene 4.2x의 경우 데이터가 있으면 자소분리 후 true가 떨어지나, 여기서는 false로 떨어져 부득이하게 ioBuffer사이즈 상태로 조건변경
   * (CharacterUtils.fill)
   *
   * @author 최일규
   * @since 2014-07-11
   */
  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();

    int length = 0;
    int start = -1; // this variable is always initialized
    char[] buffer = termAtt.buffer();
    while (true) {
      if (bufferIndex >= dataLen) {

        offset += dataLen;
        boolean isDecompose =
            charUtils.fill(ioBuffer, jasoDecompose(input, decomposeMode, typoMode));

        // 버퍼사이즈가 있으면 분석한다. (return false일때까지... 재귀호출)
        if (ioBuffer.getLength() == 0) {
          dataLen = 0; // so next offset += dataLen won't decrement offset
          if (length > 0) {
            break;
          } else {
            finalOffset = correctOffset(offset);
            return false;
          }
        }
        dataLen = ioBuffer.getLength();
        bufferIndex = 0;
      }
      // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based
      // methods are gone
      final int c = charUtils.codePointAt(ioBuffer.getBuffer(), bufferIndex, dataLen);
      bufferIndex += Character.charCount(c);

      // if it's a token char
      if (isTokenChar(c)) {

        // start of token
        if (length == 0) {
          assert start == -1;
          start = offset + bufferIndex - 1;

          // check if a supplementary could run out of bounds
        } else if (length >= buffer.length - 1) {

          // make sure a supplementary fits in the buffer
          buffer = termAtt.resizeBuffer(2 + length);
        }

        // buffer it, normalized
        length += Character.toChars(normalize(c), buffer, length);
        if (length >= MAX_WORD_LEN) {
          break;
        }
      } else if (length > 0) {
        // return 'em
        break;
      }
    }

    termAtt.setLength(length);
    assert start != -1;
    offsetAtt.setOffset(correctOffset(start), finalOffset = correctOffset(start + length));
    return true;
  }