Exemplo n.º 1
0
  @Override
  public void close() throws IOException {

    IOException ioe = null;
    try {

      final long dirStart = out.getFilePointer();
      final long indexDirStart = indexOut.getFilePointer();

      out.writeVInt(fields.size());

      for (FieldMetaData field : fields) {
        // System.out.println("  field " + field.fieldInfo.name + " " + field.numTerms + " terms");
        out.writeVInt(field.fieldInfo.number);
        out.writeVLong(field.numTerms);
        out.writeVInt(field.rootCode.length);
        out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length);
        if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
          out.writeVLong(field.sumTotalTermFreq);
        }
        out.writeVLong(field.sumDocFreq);
        out.writeVInt(field.docCount);
        indexOut.writeVLong(field.indexStartFP);
      }
      writeTrailer(out, dirStart);
      writeIndexTrailer(indexOut, indexDirStart);
    } catch (IOException ioe2) {
      ioe = ioe2;
    } finally {
      IOUtils.closeWhileHandlingException(ioe, out, indexOut, postingsWriter);
    }
  }
  @Override
  public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException {
    String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);

    int numFiles = si.files().size();
    String names[] = si.files().toArray(new String[numFiles]);
    Arrays.sort(names);
    long startOffsets[] = new long[numFiles];
    long endOffsets[] = new long[numFiles];

    BytesRefBuilder scratch = new BytesRefBuilder();

    try (IndexOutput out = dir.createOutput(dataFile, context)) {
      for (int i = 0; i < names.length; i++) {
        // write header for file
        SimpleTextUtil.write(out, HEADER);
        SimpleTextUtil.write(out, names[i], scratch);
        SimpleTextUtil.writeNewline(out);

        // write bytes for file
        startOffsets[i] = out.getFilePointer();
        try (IndexInput in = dir.openInput(names[i], IOContext.READONCE)) {
          out.copyBytes(in, in.length());
        }
        endOffsets[i] = out.getFilePointer();
      }

      long tocPos = out.getFilePointer();

      // write CFS table
      SimpleTextUtil.write(out, TABLE);
      SimpleTextUtil.write(out, Integer.toString(numFiles), scratch);
      SimpleTextUtil.writeNewline(out);

      for (int i = 0; i < names.length; i++) {
        SimpleTextUtil.write(out, TABLENAME);
        SimpleTextUtil.write(out, names[i], scratch);
        SimpleTextUtil.writeNewline(out);

        SimpleTextUtil.write(out, TABLESTART);
        SimpleTextUtil.write(out, Long.toString(startOffsets[i]), scratch);
        SimpleTextUtil.writeNewline(out);

        SimpleTextUtil.write(out, TABLEEND);
        SimpleTextUtil.write(out, Long.toString(endOffsets[i]), scratch);
        SimpleTextUtil.writeNewline(out);
      }

      DecimalFormat df =
          new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT));
      SimpleTextUtil.write(out, TABLEPOS);
      SimpleTextUtil.write(out, df.format(tocPos), scratch);
      SimpleTextUtil.writeNewline(out);
    }
  }
 @Override
 public void resetSkip() {
   super.resetSkip();
   Arrays.fill(lastSkipDoc, 0);
   Arrays.fill(
       lastSkipPayloadLength, -1); // we don't have to write the first length in the skip list
   Arrays.fill(
       lastSkipOffsetLength, -1); // we don't have to write the first length in the skip list
   Arrays.fill(lastSkipFreqPointer, freqOutput.getFilePointer());
   if (proxOutput != null) Arrays.fill(lastSkipProxPointer, proxOutput.getFilePointer());
 }
 /** Sets the values for the current skip data. */
 public void setSkipData(
     int doc, boolean storePayloads, int payloadLength, boolean storeOffsets, int offsetLength) {
   assert storePayloads || payloadLength == -1;
   assert storeOffsets || offsetLength == -1;
   this.curDoc = doc;
   this.curStorePayloads = storePayloads;
   this.curPayloadLength = payloadLength;
   this.curStoreOffsets = storeOffsets;
   this.curOffsetLength = offsetLength;
   this.curFreqPointer = freqOutput.getFilePointer();
   if (proxOutput != null) this.curProxPointer = proxOutput.getFilePointer();
 }
  // encodes values as sparse array: keys[] and values[]
  // access is log(N) where N = keys.length (slow!)
  // so this is only appropriate as an exception table for patched, or when common value is 0 (wont
  // be accessed by searching)
  private void addIndirect(
      FieldInfo field,
      final Iterable<Number> values,
      int count,
      final NormMap uniqueValues,
      final int minOrd)
      throws IOException {
    int commonCount = uniqueValues.freqs[minOrd];

    meta.writeVInt(count - commonCount);
    meta.writeByte(INDIRECT);
    meta.writeLong(data.getFilePointer());

    // write docs with value
    writeDocsWithValue(values, uniqueValues, minOrd);

    // write actual values
    writeNormsField(
        field,
        new Iterable<Number>() {
          @Override
          public Iterator<Number> iterator() {
            return new FilterIterator<Number, Number>(values.iterator()) {
              @Override
              protected boolean predicateFunction(Number value) {
                return uniqueValues.ord(value.byteValue()) > minOrd;
              }
            };
          }
        },
        1);
  }
  public void close() throws IOException {
    try {
      final long dirStart = out.getFilePointer();
      final int fieldCount = fields.size();

      int nonNullFieldCount = 0;
      for (int i = 0; i < fieldCount; i++) {
        FSTFieldWriter field = fields.get(i);
        if (field.fst != null) {
          nonNullFieldCount++;
        }
      }

      out.writeVInt(nonNullFieldCount);
      for (int i = 0; i < fieldCount; i++) {
        FSTFieldWriter field = fields.get(i);
        if (field.fst != null) {
          out.writeVInt(field.fieldInfo.number);
          out.writeVLong(field.indexStart);
        }
      }
      writeTrailer(dirStart);
    } finally {
      out.close();
    }
  }
 private void addUncompressed(Iterable<Number> values, int count) throws IOException {
   meta.writeVInt(count);
   meta.writeByte(UNCOMPRESSED); // uncompressed byte[]
   meta.writeLong(data.getFilePointer());
   for (Number nv : values) {
     data.writeByte(nv.byteValue());
   }
 }
  /**
   * This test that writes larger than the size of the buffer output will correctly increment the
   * file pointer.
   */
  public void testLargeWrites() throws IOException {
    IndexOutput os = dir.createOutput("testBufferStart.txt");

    byte[] largeBuf = new byte[2048];
    for (int i = 0; i < largeBuf.length; i++) {
      largeBuf[i] = (byte) (Math.random() * 256);
    }

    long currentPos = os.getFilePointer();
    os.writeBytes(largeBuf, largeBuf.length);

    try {
      assertEquals(currentPos + largeBuf.length, os.getFilePointer());
    } finally {
      os.close();
    }
  }
Exemplo n.º 9
0
  @Override
  public void flushTermsBlock(int start, int count) throws IOException {
    if (DEBUG)
      System.out.println(
          "PW: flushTermsBlock start="
              + start
              + " count="
              + count
              + " pendingTerms.size()="
              + pendingTerms.size());
    int wrappedCount = 0;
    assert buffer.getFilePointer() == 0;
    assert start >= count;

    final int limit = pendingTerms.size() - start + count;

    for (int idx = pendingTerms.size() - start; idx < limit; idx++) {
      final PendingTerm term = pendingTerms.get(idx);
      if (term == null) {
        wrappedCount++;
      } else {
        buffer.writeVInt(term.bytes.length);
        buffer.writeBytes(term.bytes, 0, term.bytes.length);
      }
    }

    termsOut.writeVInt((int) buffer.getFilePointer());
    buffer.writeTo(termsOut);
    buffer.reset();

    // TDOO: this could be somewhat costly since
    // pendingTerms.size() could be biggish?
    int futureWrappedCount = 0;
    final int limit2 = pendingTerms.size();
    for (int idx = limit; idx < limit2; idx++) {
      if (pendingTerms.get(idx) == null) {
        futureWrappedCount++;
      }
    }

    // Remove the terms we just wrote:
    pendingTerms.subList(pendingTerms.size() - start, limit).clear();

    if (DEBUG)
      System.out.println(
          "PW:   len="
              + buffer.getFilePointer()
              + " fp="
              + termsOut.getFilePointer()
              + " futureWrappedCount="
              + futureWrappedCount
              + " wrappedCount="
              + wrappedCount);
    // TODO: can we avoid calling this if all terms
    // were inlined...?  Eg for a "primary key" field, the
    // wrapped codec is never invoked...
    wrappedPostingsWriter.flushTermsBlock(futureWrappedCount + wrappedCount, wrappedCount);
  }
  private void addTableCompressed(
      Iterable<Number> values, FormatAndBits compression, int count, NormMap uniqueValues)
      throws IOException {
    meta.writeVInt(count);
    meta.writeByte(TABLE_COMPRESSED); // table-compressed
    meta.writeLong(data.getFilePointer());

    writeTable(values, compression, count, uniqueValues, uniqueValues.size);
  }
  public void testEncodeDecode() throws IOException {
    final int iterations = RandomInts.randomIntBetween(random(), 1, 1000);
    final float acceptableOverheadRatio = random().nextFloat();
    final int[] values = new int[(iterations - 1) * BLOCK_SIZE + ForUtil.MAX_DATA_SIZE];
    for (int i = 0; i < iterations; ++i) {
      final int bpv = random().nextInt(32);
      if (bpv == 0) {
        final int value = RandomInts.randomIntBetween(random(), 0, Integer.MAX_VALUE);
        for (int j = 0; j < BLOCK_SIZE; ++j) {
          values[i * BLOCK_SIZE + j] = value;
        }
      } else {
        for (int j = 0; j < BLOCK_SIZE; ++j) {
          values[i * BLOCK_SIZE + j] =
              RandomInts.randomIntBetween(random(), 0, (int) PackedInts.maxValue(bpv));
        }
      }
    }

    final Directory d = new RAMDirectory();
    final long endPointer;

    {
      // encode
      IndexOutput out = d.createOutput("test.bin", IOContext.DEFAULT);
      final ForUtil forUtil = new ForUtil(acceptableOverheadRatio, out);

      for (int i = 0; i < iterations; ++i) {
        forUtil.writeBlock(
            Arrays.copyOfRange(values, i * BLOCK_SIZE, values.length),
            new byte[MAX_ENCODED_SIZE],
            out);
      }
      endPointer = out.getFilePointer();
      out.close();
    }

    {
      // decode
      IndexInput in = d.openInput("test.bin", IOContext.READONCE);
      final ForUtil forUtil = new ForUtil(in);
      for (int i = 0; i < iterations; ++i) {
        if (random().nextBoolean()) {
          forUtil.skipBlock(in);
          continue;
        }
        final int[] restored = new int[MAX_DATA_SIZE];
        forUtil.readBlock(in, new byte[MAX_ENCODED_SIZE], restored);
        assertArrayEquals(
            Arrays.copyOfRange(values, i * BLOCK_SIZE, (i + 1) * BLOCK_SIZE),
            Arrays.copyOf(restored, BLOCK_SIZE));
      }
      assertEquals(endPointer, in.getFilePointer());
      in.close();
    }
  }
Exemplo n.º 12
0
  private void writeField() throws IOException {
    // remember where this field is written
    currentField.tvfPointer = tvf.getFilePointer();
    // System.out.println("Field Pointer: " + currentField.tvfPointer);

    final int size = terms.size();
    tvf.writeVInt(size);

    boolean storePositions = currentField.storePositions;
    boolean storeOffsets = currentField.storeOffsets;
    byte bits = 0x0;
    if (storePositions) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
    if (storeOffsets) bits |= STORE_OFFSET_WITH_TERMVECTOR;
    tvf.writeByte(bits);

    String lastTermText = "";
    for (int i = 0; i < size; i++) {
      TVTerm term = (TVTerm) terms.elementAt(i);
      int start = StringHelper.stringDifference(lastTermText, term.termText);
      int length = term.termText.length() - start;
      tvf.writeVInt(start); // write shared prefix length
      tvf.writeVInt(length); // write delta length
      tvf.writeChars(term.termText, start, length); // write delta chars
      tvf.writeVInt(term.freq);
      lastTermText = term.termText;

      if (storePositions) {
        if (term.positions == null)
          throw new IllegalStateException("Trying to write positions that are null!");

        // use delta encoding for positions
        int position = 0;
        for (int j = 0; j < term.freq; j++) {
          tvf.writeVInt(term.positions[j] - position);
          position = term.positions[j];
        }
      }

      if (storeOffsets) {
        if (term.offsets == null)
          throw new IllegalStateException("Trying to write offsets that are null!");

        // use delta encoding for offsets
        int position = 0;
        for (int j = 0; j < term.freq; j++) {
          tvf.writeVInt(term.offsets[j].getStartOffset() - position);
          tvf.writeVInt(
              term.offsets[j].getEndOffset()
                  - term.offsets[j].getStartOffset()); // Save the diff between the two.
          position = term.offsets[j].getEndOffset();
        }
      }
    }
  }
Exemplo n.º 13
0
 @Override
 public void messageReceived(final RecoveryFileChunkRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     final Store store = recoveryStatus.store();
     recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps());
     final RecoveryState.Index indexState = recoveryStatus.state().getIndex();
     if (request.sourceThrottleTimeInNanos() != RecoveryState.Index.UNKNOWN) {
       indexState.addSourceThrottling(request.sourceThrottleTimeInNanos());
     }
     IndexOutput indexOutput;
     if (request.position() == 0) {
       indexOutput =
           recoveryStatus.openAndPutIndexOutput(request.name(), request.metadata(), store);
     } else {
       indexOutput = recoveryStatus.getOpenIndexOutput(request.name());
     }
     BytesReference content = request.content();
     if (!content.hasArray()) {
       content = content.toBytesArray();
     }
     RateLimiter rl = recoverySettings.rateLimiter();
     if (rl != null) {
       long bytes = bytesSinceLastPause.addAndGet(content.length());
       if (bytes > rl.getMinPauseCheckBytes()) {
         // Time to pause
         bytesSinceLastPause.addAndGet(-bytes);
         long throttleTimeInNanos = rl.pause(bytes);
         indexState.addTargetThrottling(throttleTimeInNanos);
         recoveryStatus.indexShard().recoveryStats().addThrottleTime(throttleTimeInNanos);
       }
     }
     indexOutput.writeBytes(content.array(), content.arrayOffset(), content.length());
     indexState.addRecoveredBytesToFile(request.name(), content.length());
     if (indexOutput.getFilePointer() >= request.length() || request.lastChunk()) {
       try {
         Store.verify(indexOutput);
       } finally {
         // we are done
         indexOutput.close();
       }
       // write the checksum
       recoveryStatus.legacyChecksums().add(request.metadata());
       final String temporaryFileName = recoveryStatus.getTempNameForFile(request.name());
       assert Arrays.asList(store.directory().listAll()).contains(temporaryFileName);
       store.directory().sync(Collections.singleton(temporaryFileName));
       IndexOutput remove = recoveryStatus.removeOpenIndexOutputs(request.name());
       assert remove == null || remove == indexOutput; // remove maybe null if we got finished
     }
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
  private void addDeltaCompressed(Iterable<Number> values, int count) throws IOException {
    meta.writeVInt(count);
    meta.writeByte(DELTA_COMPRESSED); // delta-compressed
    meta.writeLong(data.getFilePointer());
    data.writeVInt(PackedInts.VERSION_CURRENT);
    data.writeVInt(BLOCK_SIZE);

    final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
    for (Number nv : values) {
      writer.add(nv.longValue());
    }
    writer.finish();
  }
  // encodes common values in a table, and the rest of the values as exceptions using INDIRECT.
  // the exceptions should not be accessed very often, since the values are uncommon
  private void addPatchedTable(
      FieldInfo field,
      final Iterable<Number> values,
      final int numCommonValues,
      int commonValuesCount,
      int count,
      final NormMap uniqueValues)
      throws IOException {
    meta.writeVInt(count);
    meta.writeByte(PATCHED_TABLE);
    meta.writeLong(data.getFilePointer());

    assert numCommonValues == 3 || numCommonValues == 15;
    FormatAndBits compression = fastestFormatAndBits(numCommonValues);

    writeTable(values, compression, count, uniqueValues, numCommonValues);

    meta.writeVInt(field.number);
    addIndirect(field, values, count - commonValuesCount, uniqueValues, numCommonValues);
  }
  // encodes only uncommon values in a sparse bitset
  // access is constant time, and the common case is predictable
  // exceptions nest either to CONST (if there are only 2 values), or INDIRECT (if there are > 2
  // values)
  private void addPatchedBitset(
      FieldInfo field, final Iterable<Number> values, int count, NormMap uniqueValues)
      throws IOException {
    int commonCount = uniqueValues.freqs[0];

    meta.writeVInt(count - commonCount);
    meta.writeByte(PATCHED_BITSET);
    meta.writeLong(data.getFilePointer());

    // write docs with value
    writeDocsWithValue(values, uniqueValues, 0);

    // write exceptions: only two cases make sense
    // bpv = 1 (folded into sparse bitset already)
    // bpv > 1 (add indirect exception table)
    meta.writeVInt(field.number);
    if (uniqueValues.size == 2) {
      // special case: implicit in bitset
      addConstant(uniqueValues.values[1]);
    } else {
      // exception table
      addIndirect(field, values, count, uniqueValues, 0);
    }
  }
 @Override
 public long getFilePointer() {
   return tempOut.getFilePointer();
 }
Exemplo n.º 18
0
 @Override
 public long getFilePointer() {
   return wrapped.getFilePointer();
 }
 @Override
 public long getFilePointer() {
   return delegate.getFilePointer();
 }
Exemplo n.º 20
0
 public final void openDocument() throws IOException {
   closeDocument();
   currentDocPointer = tvd.getFilePointer();
 }