Java BytesRefBuilder.setLength示例

编程语言: Java

命名空间/包名称: org.apache.lucene.util

类/类型: BytesRefBuilder

方法/功能: setLength

hotexamples.com的示例: 5

Java BytesRefBuilder.setLength - 已找到5个示例。这些是从开源项目中提取的最受好评的org.apache.lucene.util.BytesRefBuilder.setLength现实Java示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get(27)

length(11)

bytes(10)

toBytesRef(10)

append(8)

grow(6)

copyBytes(5)

copyChars(5)

setLength(5)

byteAt(4)

clear(2)

setByteAt(2)

示例#1

显示文件

文件： BlockTermsReader.java 项目： PATRIC3/p3_solr

      /* Decodes only the term bytes of the next term.  If caller then asks for
      metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
      decode all metadata up to the current term. */
      private BytesRef _next() throws IOException {
        // System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" +
        // state.termBlockOrd + " (vs " + blockTermCount + ")");
        if (state.termBlockOrd == blockTermCount && !nextBlock()) {
          // System.out.println("  eof");
          indexIsCurrent = false;
          return null;
        }

        // TODO: cutover to something better for these ints!  simple64?
        final int suffix = termSuffixesReader.readVInt();
        // System.out.println("  suffix=" + suffix);

        term.setLength(termBlockPrefix + suffix);
        term.grow(term.length());
        termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
        state.termBlockOrd++;

        // NOTE: meaningless in the non-ord case
        state.ord++;

        // System.out.println("  return term=" + fieldInfo.name + ":" + term.utf8ToString() + " " +
        // term + " tbOrd=" + state.termBlockOrd);
        return term.get();
      }

示例#2

显示文件

文件： IDVersionSegmentTermsEnum.java 项目： rmuir/lucene-solr

  /**
   * Optimized version of {@link #seekExact(BytesRef)} that can sometimes fail-fast if the version
   * indexed with the requested ID is less than the specified minIDVersion. Applications that index
   * a monotonically increasing global version with each document can use this for fast optimistic
   * concurrency.
   */
  public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {

    if (fr.index == null) {
      throw new IllegalStateException("terms index was not loaded");
    }

    term.grow(1 + target.length);

    assert clearEOF();

    //  if (DEBUG) {
    //    System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
    // fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current="
    // + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
    //   printSeekState(System.out);
    //  }

    FST.Arc<Pair<BytesRef, Long>> arc;
    int targetUpto;
    Pair<BytesRef, Long> output;

    long startFrameFP = currentFrame.fp;

    targetBeforeCurrentLength = currentFrame.ord;

    boolean changed = false;

    // TODO: we could stop earlier w/ the version check, every time we traverse an index arc we can
    // check?

    if (currentFrame != staticFrame) {

      // We are already seek'd; find the common
      // prefix of new seek term vs current term and
      // re-use the corresponding seek state.  For
      // example, if app first seeks to foobar, then
      // seeks to foobaz, we can re-use the seek state
      // for the first 5 bytes.

      // if (DEBUG) {
      //    System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
      //  }

      arc = arcs[0];
      assert arc.isFinal();
      output = arc.output;
      targetUpto = 0;

      IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
      assert validIndexPrefix <= term.length()
          : "validIndexPrefix="
              + validIndexPrefix
              + " term.length="
              + term.length()
              + " seg="
              + fr.parent;

      final int targetLimit = Math.min(target.length, validIndexPrefix);

      int cmp = 0;

      // TODO: reverse vLong byte order for better FST
      // prefix output sharing

      // First compare up to valid seek frames:
      while (targetUpto < targetLimit) {
        cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
        // if (DEBUG) {
        //    System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit
        // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
        // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output
        // + " output=" + output);
        // }
        if (cmp != 0) {
          break;
        }
        arc = arcs[1 + targetUpto];
        // if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) {
        // System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char)
        // (target.bytes[target.offset + targetUpto] & 0xFF));
        // }
        assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF)
            : "arc.label="
                + (char) arc.label
                + " targetLabel="
                + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
        if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
          output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
        }
        if (arc.isFinal()) {
          lastFrame = stack[1 + lastFrame.ord];
        }
        targetUpto++;
      }

      if (cmp == 0) {
        final int targetUptoMid = targetUpto;

        // Second compare the rest of the term, but
        // don't save arc/output/frame; we only do this
        // to find out if the target term is before,
        // equal or after the current term
        final int targetLimit2 = Math.min(target.length, term.length());
        while (targetUpto < targetLimit2) {
          cmp =
              (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
          // if (DEBUG) {
          //    System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" +
          // targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset +
          // targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
          // }
          if (cmp != 0) {
            break;
          }
          targetUpto++;
        }

        if (cmp == 0) {
          cmp = term.length() - target.length;
        }
        targetUpto = targetUptoMid;
      }

      if (cmp < 0) {
        // Common case: target term is after current
        // term, ie, app is seeking multiple terms
        // in sorted order
        // if (DEBUG) {
        //    System.out.println("  target is after current (shares prefixLen=" + targetUpto + ");
        // frame.ord=" + lastFrame.ord + "; targetUpto=" + targetUpto);
        //  }
        currentFrame = lastFrame;

      } else if (cmp > 0) {
        // Uncommon case: target term
        // is before current term; this means we can
        // keep the currentFrame but we must rewind it
        // (so we scan from the start)
        targetBeforeCurrentLength = 0;
        changed = true;
        // if (DEBUG) {
        //    System.out.println("  target is before current (shares prefixLen=" + targetUpto + ");
        // rewind frame ord=" + lastFrame.ord);
        //  }
        currentFrame = lastFrame;
        currentFrame.rewind();
      } else {
        // Target is exactly the same as current term
        assert term.length() == target.length;
        if (termExists) {

          if (currentFrame.maxIDVersion < minIDVersion) {
            // The max version for all terms in this block is lower than the minVersion
            // if (DEBUG) {
            //   System.out.println("  target is same as current maxIDVersion=" +
            // currentFrame.maxIDVersion + " is < minIDVersion=" + minIDVersion + "; return false");
            // }
            return false;
          }

          currentFrame.decodeMetaData();
          if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
            // This term's version is lower than the minVersion
            // if (DEBUG) {
            //   System.out.println("  target is same as current but version=" +
            // ((IDVersionTermState) currentFrame.state).idVersion + " is < minIDVersion=" +
            // minIDVersion + "; return false");
            // }
            return false;
          }
          // System.out.println("  term version=" + ((IDVersionTermState)
          // currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame
          // ord=" + currentFrame.ord);

          // if (DEBUG) {
          //    System.out.println("  target is same as current; return true");
          //  }
          return true;
        } else {
          // if (DEBUG) {
          //    System.out.println("  target is same as current but term doesn't exist");
          //  }
        }
        // validIndexPrefix = currentFrame.depth;
        // term.length = target.length;
        // return termExists;
      }

    } else {

      targetBeforeCurrentLength = -1;
      arc = fr.index.getFirstArc(arcs[0]);
      // System.out.println("first arc=" + arc);

      // Empty string prefix must have an output (block) in the index!
      assert arc.isFinal();
      assert arc.output != null;

      // if (DEBUG) {
      //    System.out.println("    no seek state; push root frame");
      //  }

      output = arc.output;

      currentFrame = staticFrame;

      // term.length = 0;
      targetUpto = 0;
      currentFrame =
          pushFrame(
              arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
    }

    // if (DEBUG) {
    //   System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output + "
    // currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
    // targetBeforeCurrentLength + " termExists=" + termExists);
    // }

    // We are done sharing the common prefix with the incoming target and where we are currently
    // seek'd; now continue walking the index:
    while (targetUpto < target.length) {

      final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;

      final FST.Arc<Pair<BytesRef, Long>> nextArc =
          fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader);

      if (nextArc == null) {

        // Index is exhausted
        // if (DEBUG) {
        //    System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " +
        // Integer.toHexString(targetLabel) + " termExists=" + termExists);
        //  }

        validIndexPrefix = currentFrame.prefix;
        // validIndexPrefix = targetUpto;

        currentFrame.scanToFloorFrame(target);

        if (!currentFrame.hasTerms) {
          termExists = false;
          term.setByteAt(targetUpto, (byte) targetLabel);
          term.setLength(1 + targetUpto);
          // if (DEBUG) {
          //    System.out.println("  FAST NOT_FOUND term=" + brToString(term));
          //  }
          return false;
        }
        // System.out.println("  check maxVersion=" + currentFrame.maxIDVersion + " vs " +
        // minIDVersion);

        // if (DEBUG) {
        //   System.out.println("  frame.maxIDVersion=" + currentFrame.maxIDVersion +  " vs
        // minIDVersion=" + minIDVersion);
        // }

        if (currentFrame.maxIDVersion < minIDVersion) {
          // The max version for all terms in this block is lower than the minVersion
          if (currentFrame.fp != startFrameFP || changed) {
            // if (targetUpto+1 > term.length) {
            termExists = false;
            term.setByteAt(targetUpto, (byte) targetLabel);
            term.setLength(1 + targetUpto);
            // if (DEBUG) {
            //   System.out.println("    reset current term");
            // }
            validIndexPrefix = Math.min(validIndexPrefix, term.length());
          }
          // if (currentFrame.ord != startFrameOrd) {
          // termExists = false;
          // }
          // if (DEBUG) {
          //   System.out.println("    FAST version NOT_FOUND term=" + brToString(term) + "
          // targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion +
          // " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " +
          // currentFrame.fp + " termExists=" + termExists);
          // }
          return false;
        }

        currentFrame.loadBlock();

        // if (DEBUG) {
        //   System.out.println("    scan currentFrame ord=" + currentFrame.ord);
        // }
        final SeekStatus result = currentFrame.scanToTerm(target, true);
        if (result == SeekStatus.FOUND) {
          currentFrame.decodeMetaData();
          if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
            // This term's version is lower than the minVersion
            // if (DEBUG) {
            //   System.out.println("    return NOT_FOUND: idVersion=" + ((IDVersionTermState)
            // currentFrame.state).idVersion + " vs minIDVersion=" + minIDVersion);
            // }
            return false;
          }

          // if (DEBUG) {
          //    System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
          //  }

          return true;
        } else {
          // if (DEBUG) {
          //    System.out.println("  got " + result + "; return NOT_FOUND term=" +
          // brToString(term));
          // }
          return false;
        }
      } else {
        // Follow this arc
        arc = nextArc;
        if (term.byteAt(targetUpto) != (byte) targetLabel) {
          // if (DEBUG) {
          //   System.out.println("  now set termExists=false targetUpto=" + targetUpto + " term=" +
          // term.bytes[targetUpto] + " targetLabel=" + targetLabel);
          // }
          changed = true;
          term.setByteAt(targetUpto, (byte) targetLabel);
          termExists = false;
        }
        // Aggregate output as we go:
        assert arc.output != null;
        if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
          output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
        }

        // if (DEBUG) {
        //    System.out.println("    index: follow label=" + (char) ((target.bytes[target.offset +
        // targetUpto]&0xff)) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
        //  }
        targetUpto++;

        if (arc.isFinal()) {
          // if (DEBUG) System.out.println("    arc is final!");
          currentFrame =
              pushFrame(
                  arc,
                  VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput),
                  targetUpto);
          // if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" +
          // currentFrame.hasTerms);
        }
      }
    }

    // validIndexPrefix = targetUpto;
    validIndexPrefix = currentFrame.prefix;

    currentFrame.scanToFloorFrame(target);

    // Target term is entirely contained in the index:
    if (!currentFrame.hasTerms) {
      termExists = false;
      term.setLength(targetUpto);
      // if (DEBUG) {
      //    System.out.println("  FAST NOT_FOUND term=" + brToString(term));
      //  }
      return false;
    }

    // if (DEBUG) {
    //   System.out.println("  frame.maxIDVersion=" + currentFrame.maxIDVersion +  " vs
    // minIDVersion=" + minIDVersion);
    // }

    if (currentFrame.maxIDVersion < minIDVersion) {
      // The max version for all terms in this block is lower than the minVersion
      termExists = false;
      term.setLength(targetUpto);
      return false;
    }

    currentFrame.loadBlock();

    final SeekStatus result = currentFrame.scanToTerm(target, true);
    if (result == SeekStatus.FOUND) {
      // if (DEBUG) {
      //    System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
      //  }
      currentFrame.decodeMetaData();
      if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
        // This term's version is lower than the minVersion
        return false;
      }
      return true;
    } else {
      // if (DEBUG) {
      //    System.out.println("  got result " + result + "; return NOT_FOUND term=" +
      // term.utf8ToString());
      //  }

      return false;
    }
  }

示例#3

显示文件

文件： SynonymMap.java 项目： jarvisxiong/read-open-source-code

    /** Builds an {@link SynonymMap} and returns it. */
    public SynonymMap build() throws IOException {
      ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
      // TODO: are we using the best sharing options?
      org.apache.lucene.util.fst.Builder<BytesRef> builder =
          new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE4, outputs);

      BytesRefBuilder scratch = new BytesRefBuilder();
      ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();

      final Set<Integer> dedupSet;

      if (dedup) {
        dedupSet = new HashSet<>();
      } else {
        dedupSet = null;
      }

      final byte[] spare = new byte[5];

      Set<CharsRef> keys = workingSet.keySet();
      CharsRef sortedKeys[] = keys.toArray(new CharsRef[keys.size()]);
      Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator());

      final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();

      // System.out.println("fmap.build");
      for (int keyIdx = 0; keyIdx < sortedKeys.length; keyIdx++) {
        CharsRef input = sortedKeys[keyIdx];
        MapEntry output = workingSet.get(input);

        int numEntries = output.ords.size();
        // output size, assume the worst case
        int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry

        scratch.grow(estimatedSize);
        scratchOutput.reset(scratch.bytes());

        // now write our output data:
        int count = 0;
        for (int i = 0; i < numEntries; i++) {
          if (dedupSet != null) {
            // box once
            final Integer ent = output.ords.get(i);
            if (dedupSet.contains(ent)) {
              continue;
            }
            dedupSet.add(ent);
          }
          scratchOutput.writeVInt(output.ords.get(i));
          count++;
        }

        final int pos = scratchOutput.getPosition();
        scratchOutput.writeVInt(count << 1 | (output.includeOrig ? 0 : 1));
        final int pos2 = scratchOutput.getPosition();
        final int vIntLen = pos2 - pos;

        // Move the count + includeOrig to the front of the byte[]:
        System.arraycopy(scratch.bytes(), pos, spare, 0, vIntLen);
        System.arraycopy(scratch.bytes(), 0, scratch.bytes(), vIntLen, pos);
        System.arraycopy(spare, 0, scratch.bytes(), 0, vIntLen);

        if (dedupSet != null) {
          dedupSet.clear();
        }

        scratch.setLength(scratchOutput.getPosition());
        // System.out.println("  add input=" + input + " output=" + scratch + " offset=" +
        // scratch.offset + " length=" + scratch.length + " count=" + count);
        builder.add(Util.toUTF32(input, scratchIntsRef), scratch.toBytesRef());
      }

      FST<BytesRef> fst = builder.finish();
      return new SynonymMap(fst, words, maxHorizontalContext);
    }

示例#4

显示文件

文件： BlockTermsReader.java 项目： PATRIC3/p3_solr

      // TODO: we may want an alternate mode here which is
      // "if you are about to return NOT_FOUND I won't use
      // the terms data from that"; eg FuzzyTermsEnum will
      // (usually) just immediately call seek again if we
      // return NOT_FOUND so it's a waste for us to fill in
      // the term that was actually NOT_FOUND
      @Override
      public SeekStatus seekCeil(final BytesRef target) throws IOException {

        if (indexEnum == null) {
          throw new IllegalStateException("terms index was not loaded");
        }

        // System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" +
        // target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term()
        // + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending="
        // + seekPending + " divisor=" + indexReader.getDivisor() + " this="  + this);
        if (didIndexNext) {
          if (nextIndexTerm == null) {
            // System.out.println("  nextIndexTerm=null");
          } else {
            // System.out.println("  nextIndexTerm=" + nextIndexTerm.utf8ToString());
          }
        }

        boolean doSeek = true;

        // See if we can avoid seeking, because target term
        // is after current term but before next index term:
        if (indexIsCurrent) {

          final int cmp = BytesRef.getUTF8SortedAsUnicodeComparator().compare(term.get(), target);

          if (cmp == 0) {
            // Already at the requested term
            return SeekStatus.FOUND;
          } else if (cmp < 0) {

            // Target term is after current term
            if (!didIndexNext) {
              if (indexEnum.next() == -1) {
                nextIndexTerm = null;
              } else {
                nextIndexTerm = indexEnum.term();
              }
              // System.out.println("  now do index next() nextIndexTerm=" + (nextIndexTerm == null
              // ? "null" : nextIndexTerm.utf8ToString()));
              didIndexNext = true;
            }

            if (nextIndexTerm == null
                || BytesRef.getUTF8SortedAsUnicodeComparator().compare(target, nextIndexTerm) < 0) {
              // Optimization: requested term is within the
              // same term block we are now in; skip seeking
              // (but do scanning):
              doSeek = false;
              // System.out.println("  skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null"
              // : nextIndexTerm.utf8ToString()));
            }
          }
        }

        if (doSeek) {
          // System.out.println("  seek");

          // Ask terms index to find biggest indexed term (=
          // first term in a block) that's <= our text:
          in.seek(indexEnum.seek(target));
          boolean result = nextBlock();

          // Block must exist since, at least, the indexed term
          // is in the block:
          assert result;

          indexIsCurrent = true;
          didIndexNext = false;

          if (doOrd) {
            state.ord = indexEnum.ord() - 1;
          }

          term.copyBytes(indexEnum.term());
          // System.out.println("  seek: term=" + term.utf8ToString());
        } else {
          // System.out.println("  skip seek");
          if (state.termBlockOrd == blockTermCount && !nextBlock()) {
            indexIsCurrent = false;
            return SeekStatus.END;
          }
        }

        seekPending = false;

        int common = 0;

        // Scan within block.  We could do this by calling
        // _next() and testing the resulting term, but this
        // is wasteful.  Instead, we first confirm the
        // target matches the common prefix of this block,
        // and then we scan the term bytes directly from the
        // termSuffixesreader's byte[], saving a copy into
        // the BytesRef term per term.  Only when we return
        // do we then copy the bytes into the term.

        while (true) {

          // First, see if target term matches common prefix
          // in this block:
          if (common < termBlockPrefix) {
            final int cmp =
                (term.byteAt(common) & 0xFF) - (target.bytes[target.offset + common] & 0xFF);
            if (cmp < 0) {

              // TODO: maybe we should store common prefix
              // in block header?  (instead of relying on
              // last term of previous block)

              // Target's prefix is after the common block
              // prefix, so term cannot be in this block
              // but it could be in next block.  We
              // must scan to end-of-block to set common
              // prefix for next block:
              if (state.termBlockOrd < blockTermCount) {
                while (state.termBlockOrd < blockTermCount - 1) {
                  state.termBlockOrd++;
                  state.ord++;
                  termSuffixesReader.skipBytes(termSuffixesReader.readVInt());
                }
                final int suffix = termSuffixesReader.readVInt();
                term.setLength(termBlockPrefix + suffix);
                term.grow(term.length());
                termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
              }
              state.ord++;

              if (!nextBlock()) {
                indexIsCurrent = false;
                return SeekStatus.END;
              }
              common = 0;

            } else if (cmp > 0) {
              // Target's prefix is before the common prefix
              // of this block, so we position to start of
              // block and return NOT_FOUND:
              assert state.termBlockOrd == 0;

              final int suffix = termSuffixesReader.readVInt();
              term.setLength(termBlockPrefix + suffix);
              term.grow(term.length());
              termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
              return SeekStatus.NOT_FOUND;
            } else {
              common++;
            }

            continue;
          }

          // Test every term in this block
          while (true) {
            state.termBlockOrd++;
            state.ord++;

            final int suffix = termSuffixesReader.readVInt();

            // We know the prefix matches, so just compare the new suffix:
            final int termLen = termBlockPrefix + suffix;
            int bytePos = termSuffixesReader.getPosition();

            boolean next = false;
            final int limit = target.offset + (termLen < target.length ? termLen : target.length);
            int targetPos = target.offset + termBlockPrefix;
            while (targetPos < limit) {
              final int cmp = (termSuffixes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF);
              if (cmp < 0) {
                // Current term is still before the target;
                // keep scanning
                next = true;
                break;
              } else if (cmp > 0) {
                // Done!  Current term is after target. Stop
                // here, fill in real term, return NOT_FOUND.
                term.setLength(termBlockPrefix + suffix);
                term.grow(term.length());
                termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
                // System.out.println("  NOT_FOUND");
                return SeekStatus.NOT_FOUND;
              }
            }

            if (!next && target.length <= termLen) {
              term.setLength(termBlockPrefix + suffix);
              term.grow(term.length());
              termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);

              if (target.length == termLen) {
                // Done!  Exact match.  Stop here, fill in
                // real term, return FOUND.
                // System.out.println("  FOUND");
                return SeekStatus.FOUND;
              } else {
                // System.out.println("  NOT_FOUND");
                return SeekStatus.NOT_FOUND;
              }
            }

            if (state.termBlockOrd == blockTermCount) {
              // Must pre-fill term for next block's common prefix
              term.setLength(termBlockPrefix + suffix);
              term.grow(term.length());
              termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
              break;
            } else {
              termSuffixesReader.skipBytes(suffix);
            }
          }

          // The purpose of the terms dict index is to seek
          // the enum to the closest index term before the
          // term we are looking for.  So, we should never
          // cross another index term (besides the first
          // one) while we are scanning:

          assert indexIsCurrent;

          if (!nextBlock()) {
            // System.out.println("  END");
            indexIsCurrent = false;
            return SeekStatus.END;
          }
          common = 0;
        }
      }

示例#5

显示文件

文件： SimpleTextTermVectorsReader.java 项目： rmuir/lucene-solr

  @Override
  public Fields get(int doc) throws IOException {
    SortedMap<String, SimpleTVTerms> fields = new TreeMap<>();
    in.seek(offsets[doc]);
    readLine();
    assert StringHelper.startsWith(scratch.get(), NUMFIELDS);
    int numFields = parseIntAt(NUMFIELDS.length);
    if (numFields == 0) {
      return null; // no vectors for this doc
    }
    for (int i = 0; i < numFields; i++) {
      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELD);
      // skip fieldNumber:
      parseIntAt(FIELD.length);

      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELDNAME);
      String fieldName = readString(FIELDNAME.length, scratch);

      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELDPOSITIONS);
      boolean positions = Boolean.parseBoolean(readString(FIELDPOSITIONS.length, scratch));

      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELDOFFSETS);
      boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));

      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELDPAYLOADS);
      boolean payloads = Boolean.parseBoolean(readString(FIELDPAYLOADS.length, scratch));

      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELDTERMCOUNT);
      int termCount = parseIntAt(FIELDTERMCOUNT.length);

      SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads);
      fields.put(fieldName, terms);

      BytesRefBuilder term = new BytesRefBuilder();
      for (int j = 0; j < termCount; j++) {
        readLine();
        assert StringHelper.startsWith(scratch.get(), TERMTEXT);
        int termLength = scratch.length() - TERMTEXT.length;
        term.grow(termLength);
        term.setLength(termLength);
        System.arraycopy(scratch.bytes(), TERMTEXT.length, term.bytes(), 0, termLength);

        SimpleTVPostings postings = new SimpleTVPostings();
        terms.terms.put(term.toBytesRef(), postings);

        readLine();
        assert StringHelper.startsWith(scratch.get(), TERMFREQ);
        postings.freq = parseIntAt(TERMFREQ.length);

        if (positions || offsets) {
          if (positions) {
            postings.positions = new int[postings.freq];
            if (payloads) {
              postings.payloads = new BytesRef[postings.freq];
            }
          }

          if (offsets) {
            postings.startOffsets = new int[postings.freq];
            postings.endOffsets = new int[postings.freq];
          }

          for (int k = 0; k < postings.freq; k++) {
            if (positions) {
              readLine();
              assert StringHelper.startsWith(scratch.get(), POSITION);
              postings.positions[k] = parseIntAt(POSITION.length);
              if (payloads) {
                readLine();
                assert StringHelper.startsWith(scratch.get(), PAYLOAD);
                if (scratch.length() - PAYLOAD.length == 0) {
                  postings.payloads[k] = null;
                } else {
                  byte payloadBytes[] = new byte[scratch.length() - PAYLOAD.length];
                  System.arraycopy(
                      scratch.bytes(), PAYLOAD.length, payloadBytes, 0, payloadBytes.length);
                  postings.payloads[k] = new BytesRef(payloadBytes);
                }
              }
            }

            if (offsets) {
              readLine();
              assert StringHelper.startsWith(scratch.get(), STARTOFFSET);
              postings.startOffsets[k] = parseIntAt(STARTOFFSET.length);

              readLine();
              assert StringHelper.startsWith(scratch.get(), ENDOFFSET);
              postings.endOffsets[k] = parseIntAt(ENDOFFSET.length);
            }
          }
        }
      }
    }
    return new SimpleTVFields(fields);
  }