Java IndexOutput.writeVInt 예제들, org.apache.lucene.store.IndexOutput.writeVInt Java 예제들

예제 #1

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

  private void writeTable(
      Iterable<Number> values,
      FormatAndBits compression,
      int count,
      NormMap uniqueValues,
      int numOrds)
      throws IOException {
    data.writeVInt(PackedInts.VERSION_CURRENT);
    data.writeVInt(compression.format.getId());
    data.writeVInt(compression.bitsPerValue);

    data.writeVInt(numOrds);
    for (int i = 0; i < numOrds; i++) {
      data.writeByte(uniqueValues.values[i]);
    }

    final PackedInts.Writer writer =
        PackedInts.getWriterNoHeader(
            data,
            compression.format,
            count,
            compression.bitsPerValue,
            PackedInts.DEFAULT_BUFFER_SIZE);
    for (Number nv : values) {
      int ord = uniqueValues.ord(nv.byteValue());
      if (ord < numOrds) {
        writer.add(ord);
      } else {
        writer.add(numOrds); // collapses all ords >= numOrds into a single value
      }
    }
    writer.finish();
  }

예제 #2

0

파일 보기

파일: BlockTreeTermsWriter.java 프로젝트: shaie/lucene-solr

  @Override
  public void close() throws IOException {

    IOException ioe = null;
    try {

      final long dirStart = out.getFilePointer();
      final long indexDirStart = indexOut.getFilePointer();

      out.writeVInt(fields.size());

      for (FieldMetaData field : fields) {
        // System.out.println("  field " + field.fieldInfo.name + " " + field.numTerms + " terms");
        out.writeVInt(field.fieldInfo.number);
        out.writeVLong(field.numTerms);
        out.writeVInt(field.rootCode.length);
        out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length);
        if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
          out.writeVLong(field.sumTotalTermFreq);
        }
        out.writeVLong(field.sumDocFreq);
        out.writeVInt(field.docCount);
        indexOut.writeVLong(field.indexStartFP);
      }
      writeTrailer(out, dirStart);
      writeIndexTrailer(indexOut, indexDirStart);
    } catch (IOException ioe2) {
      ioe = ioe2;
    } finally {
      IOUtils.closeWhileHandlingException(ioe, out, indexOut, postingsWriter);
    }
  }

예제 #3

0

파일 보기

파일: TermVectorsWriter.java 프로젝트: praisondani/chemxseer

  private void writeDoc() throws IOException {
    if (isFieldOpen())
      throw new IllegalStateException("Field is still open while writing document");
    // System.out.println("Writing doc pointer: " + currentDocPointer);
    // write document index record
    tvx.writeLong(currentDocPointer);

    // write document data record
    final int size = fields.size();

    // write the number of fields
    tvd.writeVInt(size);

    // write field numbers
    for (int i = 0; i < size; i++) {
      TVField field = (TVField) fields.elementAt(i);
      tvd.writeVInt(field.number);
    }

    // write field pointers
    long lastFieldPointer = 0;
    for (int i = 0; i < size; i++) {
      TVField field = (TVField) fields.elementAt(i);
      tvd.writeVLong(field.tvfPointer - lastFieldPointer);
      lastFieldPointer = field.tvfPointer;
    }
    // System.out.println("After writing doc pointer: " + tvx.getFilePointer());
  }

예제 #4

0

파일 보기

파일: VariableGapTermsIndexWriter.java 프로젝트: sudarshang/lucene-solr

  public void close() throws IOException {
    try {
      final long dirStart = out.getFilePointer();
      final int fieldCount = fields.size();

      int nonNullFieldCount = 0;
      for (int i = 0; i < fieldCount; i++) {
        FSTFieldWriter field = fields.get(i);
        if (field.fst != null) {
          nonNullFieldCount++;
        }
      }

      out.writeVInt(nonNullFieldCount);
      for (int i = 0; i < fieldCount; i++) {
        FSTFieldWriter field = fields.get(i);
        if (field.fst != null) {
          out.writeVInt(field.fieldInfo.number);
          out.writeVLong(field.indexStart);
        }
      }
      writeTrailer(dirStart);
    } finally {
      out.close();
    }
  }

예제 #5

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

  private void addDeltaCompressed(Iterable<Number> values, int count) throws IOException {
    meta.writeVInt(count);
    meta.writeByte(DELTA_COMPRESSED); // delta-compressed
    meta.writeLong(data.getFilePointer());
    data.writeVInt(PackedInts.VERSION_CURRENT);
    data.writeVInt(BLOCK_SIZE);

    final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
    for (Number nv : values) {
      writer.add(nv.longValue());
    }
    writer.finish();
  }

예제 #6

0

파일 보기

파일: Lucene46FieldInfosFormat.java 프로젝트: PATRIC3/p3_solr

  @Override
  public void write(
      Directory directory,
      SegmentInfo segmentInfo,
      String segmentSuffix,
      FieldInfos infos,
      IOContext context)
      throws IOException {
    final String fileName =
        IndexFileNames.segmentFileName(
            segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
    try (IndexOutput output = directory.createOutput(fileName, context)) {
      CodecUtil.writeHeader(
          output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
      output.writeVInt(infos.size());
      for (FieldInfo fi : infos) {
        IndexOptions indexOptions = fi.getIndexOptions();
        byte bits = 0x0;
        if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
        if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
        if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
        if (fi.getIndexOptions() != IndexOptions.NONE) {
          bits |= Lucene46FieldInfosFormat.IS_INDEXED;
          assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0
              || !fi.hasPayloads();
          if (indexOptions == IndexOptions.DOCS) {
            bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
            bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
            bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
          }
        }
        output.writeString(fi.name);
        output.writeVInt(fi.number);
        output.writeByte(bits);

        // pack the DV types in one byte
        final byte dv = docValuesByte(fi.getDocValuesType());
        final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : DocValuesType.NONE);
        assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
        byte val = (byte) (0xff & ((nrm << 4) | dv));
        output.writeByte(val);
        output.writeLong(fi.getDocValuesGen());
        output.writeStringStringMap(fi.attributes());
      }
      CodecUtil.writeFooter(output);
    }
  }

예제 #7

0

파일 보기

파일: PulsingPostingsWriter.java 프로젝트: naryad/Solr4.0

 @Override
 public void start(IndexOutput termsOut) throws IOException {
   this.termsOut = termsOut;
   CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
   termsOut.writeVInt(pending.length); // encode maxPositions in header
   wrappedPostingsWriter.start(termsOut);
 }

예제 #8

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

  // encodes values as sparse array: keys[] and values[]
  // access is log(N) where N = keys.length (slow!)
  // so this is only appropriate as an exception table for patched, or when common value is 0 (wont
  // be accessed by searching)
  private void addIndirect(
      FieldInfo field,
      final Iterable<Number> values,
      int count,
      final NormMap uniqueValues,
      final int minOrd)
      throws IOException {
    int commonCount = uniqueValues.freqs[minOrd];

    meta.writeVInt(count - commonCount);
    meta.writeByte(INDIRECT);
    meta.writeLong(data.getFilePointer());

    // write docs with value
    writeDocsWithValue(values, uniqueValues, minOrd);

    // write actual values
    writeNormsField(
        field,
        new Iterable<Number>() {
          @Override
          public Iterator<Number> iterator() {
            return new FilterIterator<Number, Number>(values.iterator()) {
              @Override
              protected boolean predicateFunction(Number value) {
                return uniqueValues.ord(value.byteValue()) > minOrd;
              }
            };
          }
        },
        1);
  }

예제 #9

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

 private void addUncompressed(Iterable<Number> values, int count) throws IOException {
   meta.writeVInt(count);
   meta.writeByte(UNCOMPRESSED); // uncompressed byte[]
   meta.writeLong(data.getFilePointer());
   for (Number nv : values) {
     data.writeByte(nv.byteValue());
   }
 }

예제 #10

0

파일 보기

파일: PulsingPostingsWriter.java 프로젝트: naryad/Solr4.0

  @Override
  public void flushTermsBlock(int start, int count) throws IOException {
    if (DEBUG)
      System.out.println(
          "PW: flushTermsBlock start="
              + start
              + " count="
              + count
              + " pendingTerms.size()="
              + pendingTerms.size());
    int wrappedCount = 0;
    assert buffer.getFilePointer() == 0;
    assert start >= count;

    final int limit = pendingTerms.size() - start + count;

    for (int idx = pendingTerms.size() - start; idx < limit; idx++) {
      final PendingTerm term = pendingTerms.get(idx);
      if (term == null) {
        wrappedCount++;
      } else {
        buffer.writeVInt(term.bytes.length);
        buffer.writeBytes(term.bytes, 0, term.bytes.length);
      }
    }

    termsOut.writeVInt((int) buffer.getFilePointer());
    buffer.writeTo(termsOut);
    buffer.reset();

    // TDOO: this could be somewhat costly since
    // pendingTerms.size() could be biggish?
    int futureWrappedCount = 0;
    final int limit2 = pendingTerms.size();
    for (int idx = limit; idx < limit2; idx++) {
      if (pendingTerms.get(idx) == null) {
        futureWrappedCount++;
      }
    }

    // Remove the terms we just wrote:
    pendingTerms.subList(pendingTerms.size() - start, limit).clear();

    if (DEBUG)
      System.out.println(
          "PW:   len="
              + buffer.getFilePointer()
              + " fp="
              + termsOut.getFilePointer()
              + " futureWrappedCount="
              + futureWrappedCount
              + " wrappedCount="
              + wrappedCount);
    // TODO: can we avoid calling this if all terms
    // were inlined...?  Eg for a "primary key" field, the
    // wrapped codec is never invoked...
    wrappedPostingsWriter.flushTermsBlock(futureWrappedCount + wrappedCount, wrappedCount);
  }

예제 #11

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

  private void addTableCompressed(
      Iterable<Number> values, FormatAndBits compression, int count, NormMap uniqueValues)
      throws IOException {
    meta.writeVInt(count);
    meta.writeByte(TABLE_COMPRESSED); // table-compressed
    meta.writeLong(data.getFilePointer());

    writeTable(values, compression, count, uniqueValues, uniqueValues.size);
  }

예제 #12

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

  private void writeDocsWithValue(final Iterable<Number> values, NormMap uniqueValues, int minOrd)
      throws IOException {
    data.writeLong(uniqueValues.values[minOrd]);
    data.writeVInt(PackedInts.VERSION_CURRENT);
    data.writeVInt(BLOCK_SIZE);

    // write docs with value
    final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
    int doc = 0;
    for (Number n : values) {
      int ord = uniqueValues.ord(n.byteValue());
      if (ord > minOrd) {
        writer.add(doc);
      }
      doc++;
    }
    writer.finish();
  }

예제 #13

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

  // encodes common values in a table, and the rest of the values as exceptions using INDIRECT.
  // the exceptions should not be accessed very often, since the values are uncommon
  private void addPatchedTable(
      FieldInfo field,
      final Iterable<Number> values,
      final int numCommonValues,
      int commonValuesCount,
      int count,
      final NormMap uniqueValues)
      throws IOException {
    meta.writeVInt(count);
    meta.writeByte(PATCHED_TABLE);
    meta.writeLong(data.getFilePointer());

    assert numCommonValues == 3 || numCommonValues == 15;
    FormatAndBits compression = fastestFormatAndBits(numCommonValues);

    writeTable(values, compression, count, uniqueValues, numCommonValues);

    meta.writeVInt(field.number);
    addIndirect(field, values, count - commonValuesCount, uniqueValues, numCommonValues);
  }

예제 #14

0

파일 보기

파일: TermVectorsWriter.java 프로젝트: praisondani/chemxseer

  private void writeField() throws IOException {
    // remember where this field is written
    currentField.tvfPointer = tvf.getFilePointer();
    // System.out.println("Field Pointer: " + currentField.tvfPointer);

    final int size = terms.size();
    tvf.writeVInt(size);

    boolean storePositions = currentField.storePositions;
    boolean storeOffsets = currentField.storeOffsets;
    byte bits = 0x0;
    if (storePositions) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
    if (storeOffsets) bits |= STORE_OFFSET_WITH_TERMVECTOR;
    tvf.writeByte(bits);

    String lastTermText = "";
    for (int i = 0; i < size; i++) {
      TVTerm term = (TVTerm) terms.elementAt(i);
      int start = StringHelper.stringDifference(lastTermText, term.termText);
      int length = term.termText.length() - start;
      tvf.writeVInt(start); // write shared prefix length
      tvf.writeVInt(length); // write delta length
      tvf.writeChars(term.termText, start, length); // write delta chars
      tvf.writeVInt(term.freq);
      lastTermText = term.termText;

      if (storePositions) {
        if (term.positions == null)
          throw new IllegalStateException("Trying to write positions that are null!");

        // use delta encoding for positions
        int position = 0;
        for (int j = 0; j < term.freq; j++) {
          tvf.writeVInt(term.positions[j] - position);
          position = term.positions[j];
        }
      }

      if (storeOffsets) {
        if (term.offsets == null)
          throw new IllegalStateException("Trying to write offsets that are null!");

        // use delta encoding for offsets
        int position = 0;
        for (int j = 0; j < term.freq; j++) {
          tvf.writeVInt(term.offsets[j].getStartOffset() - position);
          tvf.writeVInt(
              term.offsets[j].getEndOffset()
                  - term.offsets[j].getStartOffset()); // Save the diff between the two.
          position = term.offsets[j].getEndOffset();
        }
      }
    }
  }

예제 #15

0

파일 보기

파일: Lucene60PointsWriter.java 프로젝트: apache/lucene-solr

  @Override
  public void finish() throws IOException {
    if (finished) {
      throw new IllegalStateException("already finished");
    }
    finished = true;
    CodecUtil.writeFooter(dataOut);

    String indexFileName =
        IndexFileNames.segmentFileName(
            writeState.segmentInfo.name,
            writeState.segmentSuffix,
            Lucene60PointsFormat.INDEX_EXTENSION);
    // Write index file
    try (IndexOutput indexOut =
        writeState.directory.createOutput(indexFileName, writeState.context)) {
      CodecUtil.writeIndexHeader(
          indexOut,
          Lucene60PointsFormat.META_CODEC_NAME,
          Lucene60PointsFormat.INDEX_VERSION_CURRENT,
          writeState.segmentInfo.getId(),
          writeState.segmentSuffix);
      int count = indexFPs.size();
      indexOut.writeVInt(count);
      for (Map.Entry<String, Long> ent : indexFPs.entrySet()) {
        FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(ent.getKey());
        if (fieldInfo == null) {
          throw new IllegalStateException(
              "wrote field=\"" + ent.getKey() + "\" but that field doesn't exist in FieldInfos");
        }
        indexOut.writeVInt(fieldInfo.number);
        indexOut.writeVLong(ent.getValue());
      }
      CodecUtil.writeFooter(indexOut);
    }
  }

예제 #16

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

  // encodes only uncommon values in a sparse bitset
  // access is constant time, and the common case is predictable
  // exceptions nest either to CONST (if there are only 2 values), or INDIRECT (if there are > 2
  // values)
  private void addPatchedBitset(
      FieldInfo field, final Iterable<Number> values, int count, NormMap uniqueValues)
      throws IOException {
    int commonCount = uniqueValues.freqs[0];

    meta.writeVInt(count - commonCount);
    meta.writeByte(PATCHED_BITSET);
    meta.writeLong(data.getFilePointer());

    // write docs with value
    writeDocsWithValue(values, uniqueValues, 0);

    // write exceptions: only two cases make sense
    // bpv = 1 (folded into sparse bitset already)
    // bpv > 1 (add indirect exception table)
    meta.writeVInt(field.number);
    if (uniqueValues.size == 2) {
      // special case: implicit in bitset
      addConstant(uniqueValues.values[1]);
    } else {
      // exception table
      addIndirect(field, values, count, uniqueValues, 0);
    }
  }

예제 #17

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

 @Override
 public void close() throws IOException {
   boolean success = false;
   try {
     if (meta != null) {
       meta.writeVInt(-1); // write EOF marker
       CodecUtil.writeFooter(meta); // write checksum
     }
     if (data != null) {
       CodecUtil.writeFooter(data); // write checksum
     }
     success = true;
   } finally {
     if (success) {
       IOUtils.close(data, meta);
     } else {
       IOUtils.closeWhileHandlingException(data, meta);
     }
     meta = data = null;
   }
 }

예제 #18

0

파일 보기

파일: TermVectorsTermsWriterPerField.java 프로젝트: erikhatcher/lucene-solr-svn

  /**
   * Called once per field per document if term vectors are enabled, to write the vectors to
   * RAMOutputStream, which is then quickly flushed to the real term vectors files in the Directory.
   */
  @Override
  void finish() throws IOException {

    assert docState.testPoint("TermVectorsTermsWriterPerField.finish start");

    final int numPostings = termsHashPerField.bytesHash.size();

    final BytesRef flushTerm = perThread.flushTerm;

    assert numPostings >= 0;

    if (!doVectors || numPostings == 0) return;

    if (numPostings > maxNumPostings) maxNumPostings = numPostings;

    final IndexOutput tvf = perThread.doc.perDocTvf;

    // This is called once, after inverting all occurrences
    // of a given field in the doc.  At this point we flush
    // our hash into the DocWriter.

    assert fieldInfo.storeTermVector;
    assert perThread.vectorFieldsInOrder(fieldInfo);

    perThread.doc.addField(termsHashPerField.fieldInfo.number);
    TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;

    // TODO: we may want to make this sort in same order
    // as Codec's terms dict?
    final int[] termIDs =
        termsHashPerField.sortPostings(BytesRef.getUTF8SortedAsUnicodeComparator());

    tvf.writeVInt(numPostings);
    byte bits = 0x0;
    if (doVectorPositions) bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
    if (doVectorOffsets) bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
    tvf.writeByte(bits);

    int lastLen = 0;
    byte[] lastBytes = null;
    int lastStart = 0;

    final ByteSliceReader reader = perThread.vectorSliceReader;
    final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool;

    for (int j = 0; j < numPostings; j++) {
      final int termID = termIDs[j];
      final int freq = postings.freqs[termID];

      // Get BytesRef
      termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]);

      // Compute common byte prefix between last term and
      // this term
      int prefix = 0;
      if (j > 0) {
        while (prefix < lastLen && prefix < flushTerm.length) {
          if (lastBytes[lastStart + prefix] != flushTerm.bytes[flushTerm.offset + prefix]) {
            break;
          }
          prefix++;
        }
      }

      lastLen = flushTerm.length;
      lastBytes = flushTerm.bytes;
      lastStart = flushTerm.offset;

      final int suffix = flushTerm.length - prefix;
      tvf.writeVInt(prefix);
      tvf.writeVInt(suffix);
      tvf.writeBytes(flushTerm.bytes, lastStart + prefix, suffix);
      tvf.writeVInt(freq);

      if (doVectorPositions) {
        termsHashPerField.initReader(reader, termID, 0);
        reader.writeTo(tvf);
      }

      if (doVectorOffsets) {
        termsHashPerField.initReader(reader, termID, 1);
        reader.writeTo(tvf);
      }
    }

    termsHashPerField.reset();

    // NOTE: we clear, per-field, at the thread level,
    // because term vectors fully write themselves on each
    // field; this saves RAM (eg if large doc has two large
    // fields w/ term vectors on) because we recycle/reuse
    // all RAM after each field:
    perThread.termsHashPerThread.reset(false);
  }

예제 #19

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

  private void writeNormsField(FieldInfo field, Iterable<Number> values, int level)
      throws IOException {
    assert level <= 1; // we only "recurse" once in the indirect case
    meta.writeVInt(field.number);
    NormMap uniqueValues = new NormMap();
    int count = 0;

    for (Number nv : values) {
      if (nv == null) {
        throw new IllegalStateException(
            "illegal norms data for field " + field.name + ", got null for value: " + count);
      }
      final long v = nv.longValue();

      if (uniqueValues != null) {
        if (v >= Byte.MIN_VALUE && v <= Byte.MAX_VALUE) {
          if (uniqueValues.add((byte) v)) {
            if (uniqueValues.size > 256) {
              uniqueValues = null;
            }
          }
        } else {
          // anything outside an 8 bit float comes from a custom scorer, which is an extreme edge
          // case
          uniqueValues = null;
        }
      }
      count++;
    }

    if (uniqueValues == null) {
      addDeltaCompressed(values, count);
    } else if (uniqueValues.size == 1) {
      // 0 bpv
      addConstant(uniqueValues.values[0]);
    } else {
      // small number of unique values: this is the typical case
      uniqueValues.optimizeOrdinals();

      int numCommonValues = -1;
      int commonValuesCount = 0;
      if (level == 0 && count > 256) {
        float threshold_count = count * INDIRECT_THRESHOLD;
        if (uniqueValues.freqs[0] > threshold_count) {
          numCommonValues = 1;
        } else if ((commonValuesCount = sum(uniqueValues.freqs, 0, 3)) > threshold_count
            && uniqueValues.size > 4) {
          numCommonValues = 3;
        } else if ((commonValuesCount = sum(uniqueValues.freqs, 0, 15)) > threshold_count
            && uniqueValues.size > 16) {
          numCommonValues = 15;
        }
      }

      if (numCommonValues == -1) {
        // no pattern in values, just find the most efficient way to pack the values
        FormatAndBits compression = fastestFormatAndBits(uniqueValues.size - 1);
        if (compression.bitsPerValue == 8) {
          addUncompressed(values, count);
        } else {
          addTableCompressed(values, compression, count, uniqueValues);
        }

      } else if (numCommonValues == 1) {
        byte commonValue = uniqueValues.values[0];
        if (commonValue == 0) {
          // if the common value is missing, don't waste RAM on a bitset, since we won't be
          // searching those docs
          addIndirect(field, values, count, uniqueValues, 0);
        } else {
          // otherwise, write a sparse bitset, where 1 indicates 'uncommon value'.
          addPatchedBitset(field, values, count, uniqueValues);
        }
      } else {
        addPatchedTable(field, values, numCommonValues, commonValuesCount, count, uniqueValues);
      }
    }
  }

예제 #20

0

파일 보기

파일: Lucene40SkipListWriter.java 프로젝트: zuoyebushiwo/lucene-solr-lucene_solr_4_10_4

  @Override
  protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
    // To efficiently store payloads/offsets in the posting lists we do not store the length of
    // every payload/offset. Instead we omit the length if the previous lengths were the same
    //
    // However, in order to support skipping, the length at every skip point must be known.
    // So we use the same length encoding that we use for the posting lists for the skip data as
    // well:
    // Case 1: current field does not store payloads/offsets
    //           SkipDatum                 --> DocSkip, FreqSkip, ProxSkip
    //           DocSkip,FreqSkip,ProxSkip --> VInt
    //           DocSkip records the document number before every SkipInterval th  document in
    // TermFreqs.
    //           Document numbers are represented as differences from the previous value in the
    // sequence.
    // Case 2: current field stores payloads/offsets
    //           SkipDatum                 --> DocSkip,
    // PayloadLength?,OffsetLength?,FreqSkip,ProxSkip
    //           DocSkip,FreqSkip,ProxSkip --> VInt
    //           PayloadLength,OffsetLength--> VInt
    //         In this case DocSkip/2 is the difference between
    //         the current and the previous value. If DocSkip
    //         is odd, then a PayloadLength encoded as VInt follows,
    //         if DocSkip is even, then it is assumed that the
    //         current payload/offset lengths equals the lengths at the previous
    //         skip point
    int delta = curDoc - lastSkipDoc[level];

    if (curStorePayloads || curStoreOffsets) {
      assert curStorePayloads || curPayloadLength == lastSkipPayloadLength[level];
      assert curStoreOffsets || curOffsetLength == lastSkipOffsetLength[level];

      if (curPayloadLength == lastSkipPayloadLength[level]
          && curOffsetLength == lastSkipOffsetLength[level]) {
        // the current payload/offset lengths equals the lengths at the previous skip point,
        // so we don't store the lengths again
        skipBuffer.writeVInt(delta << 1);
      } else {
        // the payload and/or offset length is different from the previous one. We shift the
        // DocSkip,
        // set the lowest bit and store the current payload and/or offset lengths as VInts.
        skipBuffer.writeVInt(delta << 1 | 1);

        if (curStorePayloads) {
          skipBuffer.writeVInt(curPayloadLength);
          lastSkipPayloadLength[level] = curPayloadLength;
        }
        if (curStoreOffsets) {
          skipBuffer.writeVInt(curOffsetLength);
          lastSkipOffsetLength[level] = curOffsetLength;
        }
      }
    } else {
      // current field does not store payloads or offsets
      skipBuffer.writeVInt(delta);
    }

    skipBuffer.writeVLong(curFreqPointer - lastSkipFreqPointer[level]);
    skipBuffer.writeVLong(curProxPointer - lastSkipProxPointer[level]);

    lastSkipDoc[level] = curDoc;

    lastSkipFreqPointer[level] = curFreqPointer;
    lastSkipProxPointer[level] = curProxPointer;
  }

예제 #21

0

파일 보기

파일: Lucene50NormsConsumer.java 프로젝트: tylerjharden/KnowledgeSharingPlatform

 private void addConstant(byte constant) throws IOException {
   meta.writeVInt(0);
   meta.writeByte(CONST_COMPRESSED);
   meta.writeLong(constant);
 }

예제 #22

0

파일 보기

파일: SegmentInfos.java 프로젝트: fullstorydev/lucene-solr

  private void write(Directory directory) throws IOException {

    long nextGeneration = getNextPendingGeneration();
    String segmentFileName =
        IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration);

    // Always advance the generation on write:
    generation = nextGeneration;

    IndexOutput segnOutput = null;
    boolean success = false;

    try {
      segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
      CodecUtil.writeIndexHeader(
          segnOutput,
          "segments",
          VERSION_CURRENT,
          StringHelper.randomId(),
          Long.toString(nextGeneration, Character.MAX_RADIX));
      segnOutput.writeVInt(Version.LATEST.major);
      segnOutput.writeVInt(Version.LATEST.minor);
      segnOutput.writeVInt(Version.LATEST.bugfix);

      segnOutput.writeLong(version);
      segnOutput.writeInt(counter); // write counter
      segnOutput.writeInt(size());

      if (size() > 0) {

        Version minSegmentVersion = null;

        // We do a separate loop up front so we can write the minSegmentVersion before
        // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time:
        for (SegmentCommitInfo siPerCommit : this) {
          Version segmentVersion = siPerCommit.info.getVersion();
          if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) {
            minSegmentVersion = segmentVersion;
          }
        }

        segnOutput.writeVInt(minSegmentVersion.major);
        segnOutput.writeVInt(minSegmentVersion.minor);
        segnOutput.writeVInt(minSegmentVersion.bugfix);
      }

      // write infos
      for (SegmentCommitInfo siPerCommit : this) {
        SegmentInfo si = siPerCommit.info;
        segnOutput.writeString(si.name);
        byte segmentID[] = si.getId();
        // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore
        if (segmentID == null) {
          segnOutput.writeByte((byte) 0);
        } else {
          if (segmentID.length != StringHelper.ID_LENGTH) {
            throw new IllegalStateException(
                "cannot write segment: invalid id segment="
                    + si.name
                    + "id="
                    + StringHelper.idToString(segmentID));
          }
          segnOutput.writeByte((byte) 1);
          segnOutput.writeBytes(segmentID, segmentID.length);
        }
        segnOutput.writeString(si.getCodec().getName());
        segnOutput.writeLong(siPerCommit.getDelGen());
        int delCount = siPerCommit.getDelCount();
        if (delCount < 0 || delCount > si.maxDoc()) {
          throw new IllegalStateException(
              "cannot write segment: invalid maxDoc segment="
                  + si.name
                  + " maxDoc="
                  + si.maxDoc()
                  + " delCount="
                  + delCount);
        }
        segnOutput.writeInt(delCount);
        segnOutput.writeLong(siPerCommit.getFieldInfosGen());
        segnOutput.writeLong(siPerCommit.getDocValuesGen());
        segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles());
        final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles();
        segnOutput.writeInt(dvUpdatesFiles.size());
        for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) {
          segnOutput.writeInt(e.getKey());
          segnOutput.writeSetOfStrings(e.getValue());
        }
      }
      segnOutput.writeMapOfStrings(userData);
      CodecUtil.writeFooter(segnOutput);
      segnOutput.close();
      directory.sync(Collections.singleton(segmentFileName));
      success = true;
    } finally {
      if (success) {
        pendingCommit = true;
      } else {
        // We hit an exception above; try to close the file
        // but suppress any exception:
        IOUtils.closeWhileHandlingException(segnOutput);
        // Try not to leave a truncated segments_N file in
        // the index:
        IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName);
      }
    }
  }