예제 #1
0
  private void writeDoc() throws IOException {
    if (isFieldOpen())
      throw new IllegalStateException("Field is still open while writing document");
    // System.out.println("Writing doc pointer: " + currentDocPointer);
    // write document index record
    tvx.writeLong(currentDocPointer);

    // write document data record
    final int size = fields.size();

    // write the number of fields
    tvd.writeVInt(size);

    // write field numbers
    for (int i = 0; i < size; i++) {
      TVField field = (TVField) fields.elementAt(i);
      tvd.writeVInt(field.number);
    }

    // write field pointers
    long lastFieldPointer = 0;
    for (int i = 0; i < size; i++) {
      TVField field = (TVField) fields.elementAt(i);
      tvd.writeVLong(field.tvfPointer - lastFieldPointer);
      lastFieldPointer = field.tvfPointer;
    }
    // System.out.println("After writing doc pointer: " + tvx.getFilePointer());
  }
  // encodes values as sparse array: keys[] and values[]
  // access is log(N) where N = keys.length (slow!)
  // so this is only appropriate as an exception table for patched, or when common value is 0 (wont
  // be accessed by searching)
  private void addIndirect(
      FieldInfo field,
      final Iterable<Number> values,
      int count,
      final NormMap uniqueValues,
      final int minOrd)
      throws IOException {
    int commonCount = uniqueValues.freqs[minOrd];

    meta.writeVInt(count - commonCount);
    meta.writeByte(INDIRECT);
    meta.writeLong(data.getFilePointer());

    // write docs with value
    writeDocsWithValue(values, uniqueValues, minOrd);

    // write actual values
    writeNormsField(
        field,
        new Iterable<Number>() {
          @Override
          public Iterator<Number> iterator() {
            return new FilterIterator<Number, Number>(values.iterator()) {
              @Override
              protected boolean predicateFunction(Number value) {
                return uniqueValues.ord(value.byteValue()) > minOrd;
              }
            };
          }
        },
        1);
  }
 private void addUncompressed(Iterable<Number> values, int count) throws IOException {
   meta.writeVInt(count);
   meta.writeByte(UNCOMPRESSED); // uncompressed byte[]
   meta.writeLong(data.getFilePointer());
   for (Number nv : values) {
     data.writeByte(nv.byteValue());
   }
 }
  private void addTableCompressed(
      Iterable<Number> values, FormatAndBits compression, int count, NormMap uniqueValues)
      throws IOException {
    meta.writeVInt(count);
    meta.writeByte(TABLE_COMPRESSED); // table-compressed
    meta.writeLong(data.getFilePointer());

    writeTable(values, compression, count, uniqueValues, uniqueValues.size);
  }
예제 #5
0
  final void finishCommit(Directory dir) throws IOException {
    if (pendingSegnOutput == null) throw new IllegalStateException("prepareCommit was not called");
    boolean success = false;
    try {
      pendingSegnOutput.finishCommit();
      pendingSegnOutput.close();
      pendingSegnOutput = null;
      success = true;
    } finally {
      if (!success) rollbackCommit(dir);
    }

    // NOTE: if we crash here, we have left a segments_N
    // file in the directory in a possibly corrupt state (if
    // some bytes made it to stable storage and others
    // didn't).  But, the segments_N file includes checksum
    // at the end, which should catch this case.  So when a
    // reader tries to read it, it will throw a
    // CorruptIndexException, which should cause the retry
    // logic in SegmentInfos to kick in and load the last
    // good (previous) segments_N-1 file.

    final String fileName =
        IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
    success = false;
    try {
      dir.sync(Collections.singleton(fileName));
      success = true;
    } finally {
      if (!success) {
        try {
          dir.deleteFile(fileName);
        } catch (Throwable t) {
          // Suppress so we keep throwing the original exception
        }
      }
    }

    lastGeneration = generation;

    try {
      IndexOutput genOutput = dir.createOutput(IndexFileNames.SEGMENTS_GEN);
      try {
        genOutput.writeInt(FORMAT_LOCKLESS);
        genOutput.writeLong(generation);
        genOutput.writeLong(generation);
      } finally {
        genOutput.close();
      }
    } catch (ThreadInterruptedException t) {
      throw t;
    } catch (Throwable t) {
      // It's OK if we fail to write this file since it's
      // used only as one of the retry fallbacks.
    }
  }
  private void addDeltaCompressed(Iterable<Number> values, int count) throws IOException {
    meta.writeVInt(count);
    meta.writeByte(DELTA_COMPRESSED); // delta-compressed
    meta.writeLong(data.getFilePointer());
    data.writeVInt(PackedInts.VERSION_CURRENT);
    data.writeVInt(BLOCK_SIZE);

    final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
    for (Number nv : values) {
      writer.add(nv.longValue());
    }
    writer.finish();
  }
예제 #7
0
  @Override
  public void write(
      Directory directory,
      SegmentInfo segmentInfo,
      String segmentSuffix,
      FieldInfos infos,
      IOContext context)
      throws IOException {
    final String fileName =
        IndexFileNames.segmentFileName(
            segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
    try (IndexOutput output = directory.createOutput(fileName, context)) {
      CodecUtil.writeHeader(
          output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
      output.writeVInt(infos.size());
      for (FieldInfo fi : infos) {
        IndexOptions indexOptions = fi.getIndexOptions();
        byte bits = 0x0;
        if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
        if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
        if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
        if (fi.getIndexOptions() != IndexOptions.NONE) {
          bits |= Lucene46FieldInfosFormat.IS_INDEXED;
          assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0
              || !fi.hasPayloads();
          if (indexOptions == IndexOptions.DOCS) {
            bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
            bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
            bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
          }
        }
        output.writeString(fi.name);
        output.writeVInt(fi.number);
        output.writeByte(bits);

        // pack the DV types in one byte
        final byte dv = docValuesByte(fi.getDocValuesType());
        final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : DocValuesType.NONE);
        assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
        byte val = (byte) (0xff & ((nrm << 4) | dv));
        output.writeByte(val);
        output.writeLong(fi.getDocValuesGen());
        output.writeStringStringMap(fi.attributes());
      }
      CodecUtil.writeFooter(output);
    }
  }
  private void writeDocsWithValue(final Iterable<Number> values, NormMap uniqueValues, int minOrd)
      throws IOException {
    data.writeLong(uniqueValues.values[minOrd]);
    data.writeVInt(PackedInts.VERSION_CURRENT);
    data.writeVInt(BLOCK_SIZE);

    // write docs with value
    final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
    int doc = 0;
    for (Number n : values) {
      int ord = uniqueValues.ord(n.byteValue());
      if (ord > minOrd) {
        writer.add(doc);
      }
      doc++;
    }
    writer.finish();
  }
  // encodes common values in a table, and the rest of the values as exceptions using INDIRECT.
  // the exceptions should not be accessed very often, since the values are uncommon
  private void addPatchedTable(
      FieldInfo field,
      final Iterable<Number> values,
      final int numCommonValues,
      int commonValuesCount,
      int count,
      final NormMap uniqueValues)
      throws IOException {
    meta.writeVInt(count);
    meta.writeByte(PATCHED_TABLE);
    meta.writeLong(data.getFilePointer());

    assert numCommonValues == 3 || numCommonValues == 15;
    FormatAndBits compression = fastestFormatAndBits(numCommonValues);

    writeTable(values, compression, count, uniqueValues, numCommonValues);

    meta.writeVInt(field.number);
    addIndirect(field, values, count - commonValuesCount, uniqueValues, numCommonValues);
  }
  private void insertData(CoherenceDirectory dir, String fileName) throws IOException {
    byte[] test = new byte[] {1, 2, 3, 4, 5, 6, 7, 8};
    IndexOutput indexOutput = dir.createOutput(fileName);
    indexOutput.writeBytes(new byte[] {2, 4, 6, 7, 8}, 5);
    indexOutput.writeInt(-1);
    indexOutput.writeLong(10);
    indexOutput.writeInt(0);
    indexOutput.writeInt(0);
    indexOutput.writeBytes(test, 8);
    indexOutput.writeBytes(test, 5);

    indexOutput.seek(0);
    indexOutput.writeByte((byte) 8);
    if (dir.getBucketSize() > 4) {
      indexOutput.seek(2);
      indexOutput.writeBytes(new byte[] {1, 2}, 2);
    }

    indexOutput.close();
  }
  private void insertData(ByteBufferDirectory dir, int bufferSizeInBytes) throws IOException {
    byte[] test = new byte[] {1, 2, 3, 4, 5, 6, 7, 8};
    IndexOutput indexOutput = dir.createOutput("value1", IOContext.DEFAULT);
    indexOutput.writeBytes(new byte[] {2, 4, 6, 7, 8}, 5);
    indexOutput.writeInt(-1);
    indexOutput.writeLong(10);
    indexOutput.writeInt(0);
    indexOutput.writeInt(0);
    indexOutput.writeBytes(test, 8);
    indexOutput.writeBytes(test, 5);

    indexOutput.seek(0);
    indexOutput.writeByte((byte) 8);
    if (bufferSizeInBytes > 4) {
      indexOutput.seek(2);
      indexOutput.writeBytes(new byte[] {1, 2}, 2);
    }

    indexOutput.close();
  }
  // encodes only uncommon values in a sparse bitset
  // access is constant time, and the common case is predictable
  // exceptions nest either to CONST (if there are only 2 values), or INDIRECT (if there are > 2
  // values)
  private void addPatchedBitset(
      FieldInfo field, final Iterable<Number> values, int count, NormMap uniqueValues)
      throws IOException {
    int commonCount = uniqueValues.freqs[0];

    meta.writeVInt(count - commonCount);
    meta.writeByte(PATCHED_BITSET);
    meta.writeLong(data.getFilePointer());

    // write docs with value
    writeDocsWithValue(values, uniqueValues, 0);

    // write exceptions: only two cases make sense
    // bpv = 1 (folded into sparse bitset already)
    // bpv > 1 (add indirect exception table)
    meta.writeVInt(field.number);
    if (uniqueValues.size == 2) {
      // special case: implicit in bitset
      addConstant(uniqueValues.values[1]);
    } else {
      // exception table
      addIndirect(field, values, count, uniqueValues, 0);
    }
  }
 protected void writeTrailer(long dirStart) throws IOException {
   out.seek(CodecUtil.headerLength(CODEC_NAME));
   out.writeLong(dirStart);
 }
 protected void writeHeader(IndexOutput out) throws IOException {
   CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
   // Placeholder for dir offset
   out.writeLong(0);
 }
  @Override
  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    // we pull this before the seed intentionally: because its not consumed at runtime
    // (the skipInterval is written into postings header)
    int skipInterval = _TestUtil.nextInt(seedRandom, 2, 10);

    if (LuceneTestCase.VERBOSE) {
      System.out.println("MockRandomCodec: skipInterval=" + skipInterval);
    }

    final long seed = seedRandom.nextLong();

    if (LuceneTestCase.VERBOSE) {
      System.out.println(
          "MockRandomCodec: writing to seg="
              + state.segmentName
              + " formatID="
              + state.segmentSuffix
              + " seed="
              + seed);
    }

    final String seedFileName =
        IndexFileNames.segmentFileName(state.segmentName, state.segmentSuffix, SEED_EXT);
    final IndexOutput out = state.directory.createOutput(seedFileName, state.context);
    try {
      out.writeLong(seed);
    } finally {
      out.close();
    }

    final Random random = new Random(seed);

    random.nextInt(); // consume a random for buffersize

    PostingsWriterBase postingsWriter;
    if (random.nextBoolean()) {
      postingsWriter = new SepPostingsWriter(state, new MockIntStreamFactory(random), skipInterval);
    } else {
      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: writing Standard postings");
      }
      postingsWriter = new Lucene40PostingsWriter(state, skipInterval);
    }

    if (random.nextBoolean()) {
      final int totTFCutoff = _TestUtil.nextInt(random, 1, 20);
      if (LuceneTestCase.VERBOSE) {
        System.out.println(
            "MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff);
      }
      postingsWriter = new PulsingPostingsWriter(totTFCutoff, postingsWriter);
    }

    final FieldsConsumer fields;

    if (random.nextBoolean()) {
      // Use BlockTree terms dict

      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: writing BlockTree terms dict");
      }

      // TODO: would be nice to allow 1 but this is very
      // slow to write
      final int minTermsInBlock = _TestUtil.nextInt(random, 2, 100);
      final int maxTermsInBlock = Math.max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100));

      boolean success = false;
      try {
        fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
        success = true;
      } finally {
        if (!success) {
          postingsWriter.close();
        }
      }
    } else {

      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: writing Block terms dict");
      }

      boolean success = false;

      final TermsIndexWriterBase indexWriter;
      try {
        if (random.nextBoolean()) {
          state.termIndexInterval = _TestUtil.nextInt(random, 1, 100);
          if (LuceneTestCase.VERBOSE) {
            System.out.println(
                "MockRandomCodec: fixed-gap terms index (tii=" + state.termIndexInterval + ")");
          }
          indexWriter = new FixedGapTermsIndexWriter(state);
        } else {
          final VariableGapTermsIndexWriter.IndexTermSelector selector;
          final int n2 = random.nextInt(3);
          if (n2 == 0) {
            final int tii = _TestUtil.nextInt(random, 1, 100);
            selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii);
            if (LuceneTestCase.VERBOSE) {
              System.out.println("MockRandomCodec: variable-gap terms index (tii=" + tii + ")");
            }
          } else if (n2 == 1) {
            final int docFreqThresh = _TestUtil.nextInt(random, 2, 100);
            final int tii = _TestUtil.nextInt(random, 1, 100);
            selector =
                new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii);
          } else {
            final long seed2 = random.nextLong();
            final int gap = _TestUtil.nextInt(random, 2, 40);
            if (LuceneTestCase.VERBOSE) {
              System.out.println("MockRandomCodec: random-gap terms index (max gap=" + gap + ")");
            }
            selector =
                new VariableGapTermsIndexWriter.IndexTermSelector() {
                  final Random rand = new Random(seed2);

                  @Override
                  public boolean isIndexTerm(BytesRef term, TermStats stats) {
                    return rand.nextInt(gap) == gap / 2;
                  }

                  @Override
                  public void newField(FieldInfo fieldInfo) {}
                };
          }
          indexWriter = new VariableGapTermsIndexWriter(state, selector);
        }
        success = true;
      } finally {
        if (!success) {
          postingsWriter.close();
        }
      }

      success = false;
      try {
        fields = new BlockTermsWriter(indexWriter, state, postingsWriter);
        success = true;
      } finally {
        if (!success) {
          try {
            postingsWriter.close();
          } finally {
            indexWriter.close();
          }
        }
      }
    }

    return fields;
  }
예제 #16
0
 /** Writes the index file trailer. */
 private void writeIndexTrailer(IndexOutput indexOut, long dirStart) throws IOException {
   indexOut.writeLong(dirStart);
 }
 private void addConstant(byte constant) throws IOException {
   meta.writeVInt(0);
   meta.writeByte(CONST_COMPRESSED);
   meta.writeLong(constant);
 }
 @Override
 public void writeLong(long i) throws IOException {
   delegate.writeLong(i);
 }
예제 #19
0
  private void write(Directory directory) throws IOException {

    long nextGeneration = getNextPendingGeneration();
    String segmentFileName =
        IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration);

    // Always advance the generation on write:
    generation = nextGeneration;

    IndexOutput segnOutput = null;
    boolean success = false;

    try {
      segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
      CodecUtil.writeIndexHeader(
          segnOutput,
          "segments",
          VERSION_CURRENT,
          StringHelper.randomId(),
          Long.toString(nextGeneration, Character.MAX_RADIX));
      segnOutput.writeVInt(Version.LATEST.major);
      segnOutput.writeVInt(Version.LATEST.minor);
      segnOutput.writeVInt(Version.LATEST.bugfix);

      segnOutput.writeLong(version);
      segnOutput.writeInt(counter); // write counter
      segnOutput.writeInt(size());

      if (size() > 0) {

        Version minSegmentVersion = null;

        // We do a separate loop up front so we can write the minSegmentVersion before
        // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time:
        for (SegmentCommitInfo siPerCommit : this) {
          Version segmentVersion = siPerCommit.info.getVersion();
          if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) {
            minSegmentVersion = segmentVersion;
          }
        }

        segnOutput.writeVInt(minSegmentVersion.major);
        segnOutput.writeVInt(minSegmentVersion.minor);
        segnOutput.writeVInt(minSegmentVersion.bugfix);
      }

      // write infos
      for (SegmentCommitInfo siPerCommit : this) {
        SegmentInfo si = siPerCommit.info;
        segnOutput.writeString(si.name);
        byte segmentID[] = si.getId();
        // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore
        if (segmentID == null) {
          segnOutput.writeByte((byte) 0);
        } else {
          if (segmentID.length != StringHelper.ID_LENGTH) {
            throw new IllegalStateException(
                "cannot write segment: invalid id segment="
                    + si.name
                    + "id="
                    + StringHelper.idToString(segmentID));
          }
          segnOutput.writeByte((byte) 1);
          segnOutput.writeBytes(segmentID, segmentID.length);
        }
        segnOutput.writeString(si.getCodec().getName());
        segnOutput.writeLong(siPerCommit.getDelGen());
        int delCount = siPerCommit.getDelCount();
        if (delCount < 0 || delCount > si.maxDoc()) {
          throw new IllegalStateException(
              "cannot write segment: invalid maxDoc segment="
                  + si.name
                  + " maxDoc="
                  + si.maxDoc()
                  + " delCount="
                  + delCount);
        }
        segnOutput.writeInt(delCount);
        segnOutput.writeLong(siPerCommit.getFieldInfosGen());
        segnOutput.writeLong(siPerCommit.getDocValuesGen());
        segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles());
        final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles();
        segnOutput.writeInt(dvUpdatesFiles.size());
        for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) {
          segnOutput.writeInt(e.getKey());
          segnOutput.writeSetOfStrings(e.getValue());
        }
      }
      segnOutput.writeMapOfStrings(userData);
      CodecUtil.writeFooter(segnOutput);
      segnOutput.close();
      directory.sync(Collections.singleton(segmentFileName));
      success = true;
    } finally {
      if (success) {
        pendingCommit = true;
      } else {
        // We hit an exception above; try to close the file
        // but suppress any exception:
        IOUtils.closeWhileHandlingException(segnOutput);
        // Try not to leave a truncated segments_N file in
        // the index:
        IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName);
      }
    }
  }