@Override
 protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
   tmpInput.reset(scratch.bytes);
   tmpInput.skipBytes(scratch.length - 4); // suggestion + separator
   scratch.length -= 5; // sep + long
   return tmpInput.readInt();
 }
Esempio n. 2
0
      /* Decodes only the term bytes of the next term.  If caller then asks for
      metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
      decode all metadata up to the current term. */
      private BytesRef _next() throws IOException {
        // System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" +
        // state.termBlockOrd + " (vs " + blockTermCount + ")");
        if (state.termBlockOrd == blockTermCount && !nextBlock()) {
          // System.out.println("  eof");
          indexIsCurrent = false;
          return null;
        }

        // TODO: cutover to something better for these ints!  simple64?
        final int suffix = termSuffixesReader.readVInt();
        // System.out.println("  suffix=" + suffix);

        term.setLength(termBlockPrefix + suffix);
        term.grow(term.length());
        termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
        state.termBlockOrd++;

        // NOTE: meaningless in the non-ord case
        state.ord++;

        // System.out.println("  return term=" + fieldInfo.name + ":" + term.utf8ToString() + " " +
        // term + " tbOrd=" + state.termBlockOrd);
        return term.get();
      }
  // Interleaves all output tokens onto the futureOutputs:
  private void addOutput(BytesRef bytes, int matchInputLength, int matchEndOffset) {
    bytesReader.reset(bytes.bytes, bytes.offset, bytes.length);

    final int code = bytesReader.readVInt();
    final boolean keepOrig = (code & 0x1) == 0;
    final int count = code >>> 1;
    // System.out.println("  addOutput count=" + count + " keepOrig=" + keepOrig);
    for (int outputIDX = 0; outputIDX < count; outputIDX++) {
      synonyms.words.get(bytesReader.readVInt(), scratchBytes);
      // System.out.println("    outIDX=" + outputIDX + " bytes=" + scratchBytes.length);
      scratchChars.copyUTF8Bytes(scratchBytes);
      int lastStart = 0;
      final int chEnd = lastStart + scratchChars.length();
      int outputUpto = nextRead;
      for (int chIDX = lastStart; chIDX <= chEnd; chIDX++) {
        if (chIDX == chEnd || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) {
          final int outputLen = chIDX - lastStart;
          // Caller is not allowed to have empty string in
          // the output:
          assert outputLen > 0 : "output contains empty string: " + scratchChars;
          final int endOffset;
          final int posLen;
          if (chIDX == chEnd && lastStart == 0) {
            // This rule had a single output token, so, we set
            // this output's endOffset to the current
            // endOffset (ie, endOffset of the last input
            // token it matched):
            endOffset = matchEndOffset;
            posLen = keepOrig ? matchInputLength : 1;
          } else {
            // This rule has more than one output token; we
            // can't pick any particular endOffset for this
            // case, so, we inherit the endOffset for the
            // input token which this output overlaps:
            endOffset = -1;
            posLen = 1;
          }
          futureOutputs[outputUpto].add(
              scratchChars.chars(), lastStart, outputLen, endOffset, posLen);
          // System.out.println("      " + new String(scratchChars.chars, lastStart, outputLen) + "
          // outputUpto=" + outputUpto);
          lastStart = 1 + chIDX;
          // System.out.println("  slot=" + outputUpto + " keepOrig=" + keepOrig);
          outputUpto = rollIncr(outputUpto);
          assert futureOutputs[outputUpto].posIncr == 1
              : "outputUpto=" + outputUpto + " vs nextWrite=" + nextWrite;
        }
      }
    }

    int upto = nextRead;
    for (int idx = 0; idx < matchInputLength; idx++) {
      futureInputs[upto].keepOrig |= keepOrig;
      futureInputs[upto].matched = true;
      upto = rollIncr(upto);
    }
  }
Esempio n. 4
0
 /** decodes the payload at the current position */
 protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) {
   tmpInput.reset(scratch.bytes);
   tmpInput.skipBytes(scratch.length - 2); // skip to payload size
   short payloadLength = tmpInput.readShort(); // read payload size
   tmpInput.setPosition(scratch.length - 2 - payloadLength); // setPosition to start of payload
   BytesRef payloadScratch = new BytesRef(payloadLength);
   tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload
   payloadScratch.length = payloadLength;
   scratch.length -= 2; // payload length info (short)
   scratch.length -= payloadLength; // payload
   return payloadScratch;
 }
Esempio n. 5
0
      /* Does initial decode of next block of terms; this
      doesn't actually decode the docFreq, totalTermFreq,
      postings details (frq/prx offset, etc.) metadata;
      it just loads them as byte[] blobs which are then
      decoded on-demand if the metadata is ever requested
      for any term in this block.  This enables terms-only
      intensive consumes (eg certain MTQs, respelling) to
      not pay the price of decoding metadata they won't
      use. */
      private boolean nextBlock() throws IOException {

        // TODO: we still lazy-decode the byte[] for each
        // term (the suffix), but, if we decoded
        // all N terms up front then seeking could do a fast
        // bsearch w/in the block...

        // System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this);
        state.blockFilePointer = in.getFilePointer();
        blockTermCount = in.readVInt();
        // System.out.println("  blockTermCount=" + blockTermCount);
        if (blockTermCount == 0) {
          return false;
        }
        termBlockPrefix = in.readVInt();

        // term suffixes:
        int len = in.readVInt();
        if (termSuffixes.length < len) {
          termSuffixes = new byte[ArrayUtil.oversize(len, 1)];
        }
        // System.out.println("  termSuffixes len=" + len);
        in.readBytes(termSuffixes, 0, len);
        termSuffixesReader.reset(termSuffixes, 0, len);

        // docFreq, totalTermFreq
        len = in.readVInt();
        if (docFreqBytes.length < len) {
          docFreqBytes = new byte[ArrayUtil.oversize(len, 1)];
        }
        // System.out.println("  freq bytes len=" + len);
        in.readBytes(docFreqBytes, 0, len);
        freqReader.reset(docFreqBytes, 0, len);

        // metadata
        len = in.readVInt();
        if (bytes == null) {
          bytes = new byte[ArrayUtil.oversize(len, 1)];
          bytesReader = new ByteArrayDataInput();
        } else if (bytes.length < len) {
          bytes = new byte[ArrayUtil.oversize(len, 1)];
        }
        in.readBytes(bytes, 0, len);
        bytesReader.reset(bytes, 0, len);

        metaDataUpto = 0;
        state.termBlockOrd = 0;

        indexIsCurrent = false;
        // System.out.println("  indexIsCurrent=" + indexIsCurrent);

        return true;
      }
 @Override
 public void setDocument(int docId) {
   bytes = values.get(docId);
   in.reset(bytes.bytes, bytes.offset, bytes.length);
   if (!in.eof()) {
     // first value uses vLong on top of zig-zag encoding, then deltas are encoded using vLong
     long previousValue = longs[0] = ByteUtils.zigZagDecode(ByteUtils.readVLong(in));
     count = 1;
     while (!in.eof()) {
       longs = ArrayUtil.grow(longs, count + 1);
       previousValue = longs[count++] = previousValue + ByteUtils.readVLong(in);
     }
   } else {
     count = 0;
   }
 }
  // Pushes a frame we seek'd to
  IDVersionSegmentTermsEnumFrame pushFrame(
      FST.Arc<Pair<BytesRef, Long>> arc, Pair<BytesRef, Long> frameData, int length)
      throws IOException {
    scratchReader.reset(
        frameData.output1.bytes, frameData.output1.offset, frameData.output1.length);
    final long code = scratchReader.readVLong();
    final long fpSeek = code >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
    final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
    f.maxIDVersion = Long.MAX_VALUE - frameData.output2;
    f.hasTerms = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
    f.hasTermsOrig = f.hasTerms;
    f.isFloor = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
    if (f.isFloor) {
      f.setFloorData(scratchReader, frameData.output1);
    }
    pushFrame(arc, fpSeek, length);

    return f;
  }
Esempio n. 8
0
      private void decodeMetaData() throws IOException {
        // System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" +
        // state.termBlockOrd + " state=" + state);
        if (!seekPending) {
          // TODO: cutover to random-access API
          // here.... really stupid that we have to decode N
          // wasted term metadata just to get to the N+1th
          // that we really need...

          // lazily catch up on metadata decode:
          final int limit = state.termBlockOrd;
          boolean absolute = metaDataUpto == 0;
          // TODO: better API would be "jump straight to term=N"???
          while (metaDataUpto < limit) {
            // System.out.println("  decode mdUpto=" + metaDataUpto);
            // TODO: we could make "tiers" of metadata, ie,
            // decode docFreq/totalTF but don't decode postings
            // metadata; this way caller could get
            // docFreq/totalTF w/o paying decode cost for
            // postings

            // TODO: if docFreq were bulk decoded we could
            // just skipN here:

            // docFreq, totalTermFreq
            state.docFreq = freqReader.readVInt();
            // System.out.println("    dF=" + state.docFreq);
            if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
              state.totalTermFreq = state.docFreq + freqReader.readVLong();
              // System.out.println("    totTF=" + state.totalTermFreq);
            }
            // metadata
            for (int i = 0; i < longs.length; i++) {
              longs[i] = bytesReader.readVLong();
            }
            postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
            metaDataUpto++;
            absolute = false;
          }
        } else {
          // System.out.println("  skip! seekPending");
        }
      }
Esempio n. 9
0
 /** decodes the contexts at the current position */
 protected Set<BytesRef> decodeContexts(BytesRef scratch, ByteArrayDataInput tmpInput) {
   tmpInput.reset(scratch.bytes);
   tmpInput.skipBytes(scratch.length - 2); // skip to context set size
   short ctxSetSize = tmpInput.readShort();
   scratch.length -= 2;
   final Set<BytesRef> contextSet = new HashSet<>();
   for (short i = 0; i < ctxSetSize; i++) {
     tmpInput.setPosition(scratch.length - 2);
     short curContextLength = tmpInput.readShort();
     scratch.length -= 2;
     tmpInput.setPosition(scratch.length - curContextLength);
     BytesRef contextSpare = new BytesRef(curContextLength);
     tmpInput.readBytes(contextSpare.bytes, 0, curContextLength);
     contextSpare.length = curContextLength;
     contextSet.add(contextSpare);
     scratch.length -= curContextLength;
   }
   return contextSet;
 }
 @SuppressWarnings("unused")
 private void printSeekState(PrintStream out) throws IOException {
   if (currentFrame == staticFrame) {
     out.println("  no prior seek");
   } else {
     out.println("  prior seek state:");
     int ord = 0;
     boolean isSeekFrame = true;
     while (true) {
       IDVersionSegmentTermsEnumFrame f = getFrame(ord);
       assert f != null;
       final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix);
       if (f.nextEnt == -1) {
         out.println(
             "    frame "
                 + (isSeekFrame ? "(seek)" : "(next)")
                 + " ord="
                 + ord
                 + " fp="
                 + f.fp
                 + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
                 + " prefixLen="
                 + f.prefix
                 + " prefix="
                 + brToString(prefix)
                 + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
                 + " hasTerms="
                 + f.hasTerms
                 + " isFloor="
                 + f.isFloor
                 + " code="
                 + ((f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS)
                     + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0)
                     + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0))
                 + " isLastInFloor="
                 + f.isLastInFloor
                 + " mdUpto="
                 + f.metaDataUpto
                 + " tbOrd="
                 + f.getTermBlockOrd());
       } else {
         out.println(
             "    frame "
                 + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)")
                 + " ord="
                 + ord
                 + " fp="
                 + f.fp
                 + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
                 + " prefixLen="
                 + f.prefix
                 + " prefix="
                 + brToString(prefix)
                 + " nextEnt="
                 + f.nextEnt
                 + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
                 + " hasTerms="
                 + f.hasTerms
                 + " isFloor="
                 + f.isFloor
                 + " code="
                 + ((f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS)
                     + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0)
                     + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0))
                 + " lastSubFP="
                 + f.lastSubFP
                 + " isLastInFloor="
                 + f.isLastInFloor
                 + " mdUpto="
                 + f.metaDataUpto
                 + " tbOrd="
                 + f.getTermBlockOrd());
       }
       if (fr.index != null) {
         assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
         if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix - 1) & 0xFF)) {
           out.println(
               "      broken seek state: arc.label="
                   + (char) f.arc.label
                   + " vs term byte="
                   + (char) (term.byteAt(f.prefix - 1) & 0xFF));
           throw new RuntimeException("seek state is broken");
         }
         Pair<BytesRef, Long> output = Util.get(fr.index, prefix);
         if (output == null) {
           out.println("      broken seek state: prefix is not final in index");
           throw new RuntimeException("seek state is broken");
         } else if (isSeekFrame && !f.isFloor) {
           final ByteArrayDataInput reader =
               new ByteArrayDataInput(
                   output.output1.bytes, output.output1.offset, output.output1.length);
           final long codeOrig = reader.readVLong();
           final long code =
               (f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS)
                   | (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0)
                   | (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0);
           if (codeOrig != code) {
             out.println(
                 "      broken seek state: output code="
                     + codeOrig
                     + " doesn't match frame code="
                     + code);
             throw new RuntimeException("seek state is broken");
           }
         }
       }
       if (f == currentFrame) {
         break;
       }
       if (f.prefix == validIndexPrefix) {
         isSeekFrame = false;
       }
       ord++;
     }
   }
 }
Esempio n. 11
0
 /** decodes the weight at the current position */
 protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
   tmpInput.reset(scratch.bytes);
   tmpInput.skipBytes(scratch.length - 8); // suggestion
   scratch.length -= 8; // long
   return tmpInput.readLong();
 }
  @Override
  public void build(TermFreqIterator tfit) throws IOException {
    if (tfit instanceof TermFreqPayloadIterator) {
      throw new IllegalArgumentException("this suggester doesn't support payloads");
    }
    File tempInput =
        File.createTempFile(
            FSTCompletionLookup.class.getSimpleName(), ".input", Sort.defaultTempDir());
    File tempSorted =
        File.createTempFile(
            FSTCompletionLookup.class.getSimpleName(), ".sorted", Sort.defaultTempDir());

    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
    Sort.ByteSequencesReader reader = null;
    ExternalRefSorter sorter = null;

    // Push floats up front before sequences to sort them. For now, assume they are non-negative.
    // If negative floats are allowed some trickery needs to be done to find their byte order.
    boolean success = false;
    try {
      byte[] buffer = new byte[0];
      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
      BytesRef spare;
      while ((spare = tfit.next()) != null) {
        if (spare.length + 4 >= buffer.length) {
          buffer = ArrayUtil.grow(buffer, spare.length + 4);
        }

        output.reset(buffer);
        output.writeInt(encodeWeight(tfit.weight()));
        output.writeBytes(spare.bytes, spare.offset, spare.length);
        writer.write(buffer, 0, output.getPosition());
      }
      writer.close();

      // We don't know the distribution of scores and we need to bucket them, so we'll sort
      // and divide into equal buckets.
      SortInfo info = new Sort().sort(tempInput, tempSorted);
      tempInput.delete();
      FSTCompletionBuilder builder =
          new FSTCompletionBuilder(
              buckets, sorter = new ExternalRefSorter(new Sort()), sharedTailLength);

      final int inputLines = info.lines;
      reader = new Sort.ByteSequencesReader(tempSorted);
      long line = 0;
      int previousBucket = 0;
      int previousScore = 0;
      ByteArrayDataInput input = new ByteArrayDataInput();
      BytesRef tmp1 = new BytesRef();
      BytesRef tmp2 = new BytesRef();
      while (reader.read(tmp1)) {
        input.reset(tmp1.bytes);
        int currentScore = input.readInt();

        int bucket;
        if (line > 0 && currentScore == previousScore) {
          bucket = previousBucket;
        } else {
          bucket = (int) (line * buckets / inputLines);
        }
        previousScore = currentScore;
        previousBucket = bucket;

        // Only append the input, discard the weight.
        tmp2.bytes = tmp1.bytes;
        tmp2.offset = input.getPosition();
        tmp2.length = tmp1.length - input.getPosition();
        builder.add(tmp2, bucket);

        line++;
      }

      // The two FSTCompletions share the same automaton.
      this.higherWeightsCompletion = builder.build();
      this.normalCompletion =
          new FSTCompletion(higherWeightsCompletion.getFST(), false, exactMatchFirst);

      success = true;
    } finally {
      if (success) IOUtils.close(reader, writer, sorter);
      else IOUtils.closeWhileHandlingException(reader, writer, sorter);

      tempInput.delete();
      tempSorted.delete();
    }
  }
Esempio n. 13
0
      // TODO: we may want an alternate mode here which is
      // "if you are about to return NOT_FOUND I won't use
      // the terms data from that"; eg FuzzyTermsEnum will
      // (usually) just immediately call seek again if we
      // return NOT_FOUND so it's a waste for us to fill in
      // the term that was actually NOT_FOUND
      @Override
      public SeekStatus seekCeil(final BytesRef target) throws IOException {

        if (indexEnum == null) {
          throw new IllegalStateException("terms index was not loaded");
        }

        // System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" +
        // target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term()
        // + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending="
        // + seekPending + " divisor=" + indexReader.getDivisor() + " this="  + this);
        if (didIndexNext) {
          if (nextIndexTerm == null) {
            // System.out.println("  nextIndexTerm=null");
          } else {
            // System.out.println("  nextIndexTerm=" + nextIndexTerm.utf8ToString());
          }
        }

        boolean doSeek = true;

        // See if we can avoid seeking, because target term
        // is after current term but before next index term:
        if (indexIsCurrent) {

          final int cmp = BytesRef.getUTF8SortedAsUnicodeComparator().compare(term.get(), target);

          if (cmp == 0) {
            // Already at the requested term
            return SeekStatus.FOUND;
          } else if (cmp < 0) {

            // Target term is after current term
            if (!didIndexNext) {
              if (indexEnum.next() == -1) {
                nextIndexTerm = null;
              } else {
                nextIndexTerm = indexEnum.term();
              }
              // System.out.println("  now do index next() nextIndexTerm=" + (nextIndexTerm == null
              // ? "null" : nextIndexTerm.utf8ToString()));
              didIndexNext = true;
            }

            if (nextIndexTerm == null
                || BytesRef.getUTF8SortedAsUnicodeComparator().compare(target, nextIndexTerm) < 0) {
              // Optimization: requested term is within the
              // same term block we are now in; skip seeking
              // (but do scanning):
              doSeek = false;
              // System.out.println("  skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null"
              // : nextIndexTerm.utf8ToString()));
            }
          }
        }

        if (doSeek) {
          // System.out.println("  seek");

          // Ask terms index to find biggest indexed term (=
          // first term in a block) that's <= our text:
          in.seek(indexEnum.seek(target));
          boolean result = nextBlock();

          // Block must exist since, at least, the indexed term
          // is in the block:
          assert result;

          indexIsCurrent = true;
          didIndexNext = false;

          if (doOrd) {
            state.ord = indexEnum.ord() - 1;
          }

          term.copyBytes(indexEnum.term());
          // System.out.println("  seek: term=" + term.utf8ToString());
        } else {
          // System.out.println("  skip seek");
          if (state.termBlockOrd == blockTermCount && !nextBlock()) {
            indexIsCurrent = false;
            return SeekStatus.END;
          }
        }

        seekPending = false;

        int common = 0;

        // Scan within block.  We could do this by calling
        // _next() and testing the resulting term, but this
        // is wasteful.  Instead, we first confirm the
        // target matches the common prefix of this block,
        // and then we scan the term bytes directly from the
        // termSuffixesreader's byte[], saving a copy into
        // the BytesRef term per term.  Only when we return
        // do we then copy the bytes into the term.

        while (true) {

          // First, see if target term matches common prefix
          // in this block:
          if (common < termBlockPrefix) {
            final int cmp =
                (term.byteAt(common) & 0xFF) - (target.bytes[target.offset + common] & 0xFF);
            if (cmp < 0) {

              // TODO: maybe we should store common prefix
              // in block header?  (instead of relying on
              // last term of previous block)

              // Target's prefix is after the common block
              // prefix, so term cannot be in this block
              // but it could be in next block.  We
              // must scan to end-of-block to set common
              // prefix for next block:
              if (state.termBlockOrd < blockTermCount) {
                while (state.termBlockOrd < blockTermCount - 1) {
                  state.termBlockOrd++;
                  state.ord++;
                  termSuffixesReader.skipBytes(termSuffixesReader.readVInt());
                }
                final int suffix = termSuffixesReader.readVInt();
                term.setLength(termBlockPrefix + suffix);
                term.grow(term.length());
                termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
              }
              state.ord++;

              if (!nextBlock()) {
                indexIsCurrent = false;
                return SeekStatus.END;
              }
              common = 0;

            } else if (cmp > 0) {
              // Target's prefix is before the common prefix
              // of this block, so we position to start of
              // block and return NOT_FOUND:
              assert state.termBlockOrd == 0;

              final int suffix = termSuffixesReader.readVInt();
              term.setLength(termBlockPrefix + suffix);
              term.grow(term.length());
              termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
              return SeekStatus.NOT_FOUND;
            } else {
              common++;
            }

            continue;
          }

          // Test every term in this block
          while (true) {
            state.termBlockOrd++;
            state.ord++;

            final int suffix = termSuffixesReader.readVInt();

            // We know the prefix matches, so just compare the new suffix:
            final int termLen = termBlockPrefix + suffix;
            int bytePos = termSuffixesReader.getPosition();

            boolean next = false;
            final int limit = target.offset + (termLen < target.length ? termLen : target.length);
            int targetPos = target.offset + termBlockPrefix;
            while (targetPos < limit) {
              final int cmp = (termSuffixes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF);
              if (cmp < 0) {
                // Current term is still before the target;
                // keep scanning
                next = true;
                break;
              } else if (cmp > 0) {
                // Done!  Current term is after target. Stop
                // here, fill in real term, return NOT_FOUND.
                term.setLength(termBlockPrefix + suffix);
                term.grow(term.length());
                termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
                // System.out.println("  NOT_FOUND");
                return SeekStatus.NOT_FOUND;
              }
            }

            if (!next && target.length <= termLen) {
              term.setLength(termBlockPrefix + suffix);
              term.grow(term.length());
              termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);

              if (target.length == termLen) {
                // Done!  Exact match.  Stop here, fill in
                // real term, return FOUND.
                // System.out.println("  FOUND");
                return SeekStatus.FOUND;
              } else {
                // System.out.println("  NOT_FOUND");
                return SeekStatus.NOT_FOUND;
              }
            }

            if (state.termBlockOrd == blockTermCount) {
              // Must pre-fill term for next block's common prefix
              term.setLength(termBlockPrefix + suffix);
              term.grow(term.length());
              termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
              break;
            } else {
              termSuffixesReader.skipBytes(suffix);
            }
          }

          // The purpose of the terms dict index is to seek
          // the enum to the closest index term before the
          // term we are looking for.  So, we should never
          // cross another index term (besides the first
          // one) while we are scanning:

          assert indexIsCurrent;

          if (!nextBlock()) {
            // System.out.println("  END");
            indexIsCurrent = false;
            return SeekStatus.END;
          }
          common = 0;
        }
      }