Example #1
0
 /**
  * Concatenates the saved buffer to the given WordDelimiterConcatenation
  *
  * @param concatenation WordDelimiterConcatenation to concatenate the buffer to
  */
 private void concatenate(WordDelimiterConcatenation concatenation) {
   if (concatenation.isEmpty()) {
     concatenation.startOffset = savedStartOffset + iterator.current;
   }
   concatenation.append(savedBuffer, iterator.current, iterator.end - iterator.current);
   concatenation.endOffset = savedStartOffset + iterator.end;
 }
Example #2
0
 @Override
 public void reset() throws IOException {
   super.reset();
   hasSavedState = false;
   concat.clear();
   concatAll.clear();
   accumPosInc = bufferedPos = bufferedLen = 0;
   first = true;
 }
Example #3
0
 /**
  * Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or
  * just clearing.
  *
  * @param concatenation WordDelimiterConcatenation that will be flushed
  * @return {@code true} if the concatenation was written before it was cleared, {@code false}
  *     otherwise
  */
 private boolean flushConcatenation(WordDelimiterConcatenation concatenation) {
   lastConcatCount = concatenation.subwordCount;
   if (concatenation.subwordCount != 1 || !shouldGenerateParts(concatenation.type)) {
     concatenation.writeAndClear();
     return true;
   }
   concatenation.clear();
   return false;
 }
Example #4
0
  @Override
  public boolean incrementToken() throws IOException {
    while (true) {
      if (!hasSavedState) {
        // process a new input word
        if (!input.incrementToken()) {
          return false;
        }

        int termLength = termAttribute.length();
        char[] termBuffer = termAttribute.buffer();

        accumPosInc += posIncAttribute.getPositionIncrement();

        iterator.setText(termBuffer, termLength);
        iterator.next();

        // word of no delimiters, or protected word: just return it
        if ((iterator.current == 0 && iterator.end == termLength)
            || (protWords != null && protWords.contains(termBuffer, 0, termLength))) {
          posIncAttribute.setPositionIncrement(accumPosInc);
          accumPosInc = 0;
          first = false;
          return true;
        }

        // word of simply delimiters
        if (iterator.end == WordDelimiterIterator.DONE && !has(PRESERVE_ORIGINAL)) {
          // if the posInc is 1, simply ignore it in the accumulation
          // TODO: proper hole adjustment (FilteringTokenFilter-like) instead of this previous
          // logic!
          if (posIncAttribute.getPositionIncrement() == 1 && !first) {
            accumPosInc--;
          }
          continue;
        }

        saveState();

        hasOutputToken = false;
        hasOutputFollowingOriginal = !has(PRESERVE_ORIGINAL);
        lastConcatCount = 0;

        if (has(PRESERVE_ORIGINAL)) {
          posIncAttribute.setPositionIncrement(accumPosInc);
          accumPosInc = 0;
          first = false;
          return true;
        }
      }

      // at the end of the string, output any concatenations
      if (iterator.end == WordDelimiterIterator.DONE) {
        if (!concat.isEmpty()) {
          if (flushConcatenation(concat)) {
            buffer();
            continue;
          }
        }

        if (!concatAll.isEmpty()) {
          // only if we haven't output this same combo above!
          if (concatAll.subwordCount > lastConcatCount) {
            concatAll.writeAndClear();
            buffer();
            continue;
          }
          concatAll.clear();
        }

        if (bufferedPos < bufferedLen) {
          if (bufferedPos == 0) {
            sorter.sort(0, bufferedLen);
          }
          clearAttributes();
          restoreState(buffered[bufferedPos++]);
          if (first && posIncAttribute.getPositionIncrement() == 0) {
            // can easily happen with strange combinations (e.g. not outputting numbers, but
            // concat-all)
            posIncAttribute.setPositionIncrement(1);
          }
          first = false;
          return true;
        }

        // no saved concatenations, on to the next input word
        bufferedPos = bufferedLen = 0;
        hasSavedState = false;
        continue;
      }

      // word surrounded by delimiters: always output
      if (iterator.isSingleWord()) {
        generatePart(true);
        iterator.next();
        first = false;
        return true;
      }

      int wordType = iterator.type();

      // do we already have queued up incompatible concatenations?
      if (!concat.isEmpty() && (concat.type & wordType) == 0) {
        if (flushConcatenation(concat)) {
          hasOutputToken = false;
          buffer();
          continue;
        }
        hasOutputToken = false;
      }

      // add subwords depending upon options
      if (shouldConcatenate(wordType)) {
        if (concat.isEmpty()) {
          concat.type = wordType;
        }
        concatenate(concat);
      }

      // add all subwords (catenateAll)
      if (has(CATENATE_ALL)) {
        concatenate(concatAll);
      }

      // if we should output the word or number part
      if (shouldGenerateParts(wordType)) {
        generatePart(false);
        buffer();
      }

      iterator.next();
    }
  }