Java SegmentCommitInfo Exemples, org.apache.lucene.index.SegmentCommitInfo Java Exemples

Exemple #1

0

Afficher le fichier

Fichier : LuceneSegmentInputFormat.java Projet : SY141109/EvaluatorOnMahout

  @Override
  public List<LuceneSegmentInputSplit> getSplits(JobContext context)
      throws IOException, InterruptedException {
    Configuration configuration = context.getConfiguration();

    LuceneStorageConfiguration lucene2SeqConfiguration =
        new LuceneStorageConfiguration(configuration);

    List<LuceneSegmentInputSplit> inputSplits = new ArrayList<>();

    List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
    for (Path indexPath : indexPaths) {
      ReadOnlyFileSystemDirectory directory =
          new ReadOnlyFileSystemDirectory(
              FileSystem.get(configuration), indexPath, false, configuration);
      SegmentInfos segmentInfos = new SegmentInfos();
      segmentInfos.read(directory);

      for (SegmentCommitInfo segmentInfo : segmentInfos) {
        LuceneSegmentInputSplit inputSplit =
            new LuceneSegmentInputSplit(
                indexPath, segmentInfo.info.name, segmentInfo.sizeInBytes());
        inputSplits.add(inputSplit);
        LOG.info(
            "Created {} byte input split for index '{}' segment {}",
            segmentInfo.sizeInBytes(),
            indexPath.toUri(),
            segmentInfo.info.name);
      }
    }

    return inputSplits;
  }

Exemple #2

0

Afficher le fichier

Fichier : BufferedUpdatesStream.java Projet : jarvisxiong/read-open-source-code

  /* Removes any BufferedDeletes that we no longer need to
   * store because all segments in the index have had the
   * deletes applied. */
  public synchronized void prune(SegmentInfos segmentInfos) {
    assert checkDeleteStats();
    long minGen = Long.MAX_VALUE;
    for (SegmentCommitInfo info : segmentInfos) {
      minGen = Math.min(info.getBufferedDeletesGen(), minGen);
    }

    if (infoStream.isEnabled("BD")) {
      infoStream.message(
          "BD",
          "prune sis=" + segmentInfos + " minGen=" + minGen + " packetCount=" + updates.size());
    }
    final int limit = updates.size();
    for (int delIDX = 0; delIDX < limit; delIDX++) {
      if (updates.get(delIDX).delGen() >= minGen) {
        prune(delIDX);
        assert checkDeleteStats();
        return;
      }
    }

    // All deletes pruned
    prune(limit);
    assert !any();
    assert checkDeleteStats();
  }

Exemple #3

0

Afficher le fichier

Fichier : SegmentCommitInfo.java Projet : king-helianbobo/git

  /** Clones {@code this}, optionally also cloning the {@link SegmentInfo}. */
  SegmentCommitInfo clone(boolean cloneSegmentInfo) {
    SegmentInfo otherInfo;
    if (cloneSegmentInfo) {
      otherInfo = info.clone();
    } else {
      otherInfo = info;
    }
    SegmentCommitInfo other =
        new SegmentCommitInfo(otherInfo, delCount, delGen, fieldInfosGen, docValuesGen);
    // Not clear that we need to carry over nextWriteDelGen
    // (i.e. do we ever clone after a failed write and
    // before the next successful write?), but just do it to
    // be safe:
    other.nextWriteDelGen = nextWriteDelGen;
    other.nextWriteFieldInfosGen = nextWriteFieldInfosGen;
    other.nextWriteDocValuesGen = nextWriteDocValuesGen;

    // deep clone
    for (Entry<Long, Set<String>> e : genUpdatesFiles.entrySet()) {
      other.genUpdatesFiles.put(e.getKey(), new HashSet<>(e.getValue()));
    }

    // deep clone
    for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) {
      other.dvUpdatesFiles.put(e.getKey(), new HashSet<>(e.getValue()));
    }

    other.fieldInfosFiles.addAll(fieldInfosFiles);

    return other;
  }

Exemple #4

0

Afficher le fichier

Fichier : SegmentInfos.java Projet : fullstorydev/lucene-solr

 List<SegmentCommitInfo> createBackupSegmentInfos() {
   final List<SegmentCommitInfo> list = new ArrayList<>(size());
   for (final SegmentCommitInfo info : this) {
     assert info.info.getCodec() != null;
     list.add(info.clone());
   }
   return list;
 }

Exemple #5

0

Afficher le fichier

Fichier : BufferedUpdatesStream.java Projet : jarvisxiong/read-open-source-code

 @Override
 public int compare(SegmentCommitInfo si1, SegmentCommitInfo si2) {
   final long cmp = si1.getBufferedDeletesGen() - si2.getBufferedDeletesGen();
   if (cmp > 0) {
     return 1;
   } else if (cmp < 0) {
     return -1;
   } else {
     return 0;
   }
 }

Exemple #6

0

Afficher le fichier

Fichier : SegmentInfos.java Projet : fullstorydev/lucene-solr

 /** Returns readable description of this segment. */
 @Override
 public String toString() {
   StringBuilder buffer = new StringBuilder();
   buffer.append(getSegmentsFileName()).append(": ");
   final int count = size();
   for (int i = 0; i < count; i++) {
     if (i > 0) {
       buffer.append(' ');
     }
     final SegmentCommitInfo info = info(i);
     buffer.append(info.toString(0));
   }
   return buffer.toString();
 }

Exemple #7

0

Afficher le fichier

Fichier : SegmentInfos.java Projet : fullstorydev/lucene-solr

  /**
   * Returns all file names referenced by SegmentInfo. The returned collection is recomputed on each
   * invocation.
   */
  public Collection<String> files(boolean includeSegmentsFile) throws IOException {
    HashSet<String> files = new HashSet<>();
    if (includeSegmentsFile) {
      final String segmentFileName = getSegmentsFileName();
      if (segmentFileName != null) {
        files.add(segmentFileName);
      }
    }
    final int size = size();
    for (int i = 0; i < size; i++) {
      final SegmentCommitInfo info = info(i);
      files.addAll(info.files());
    }

    return files;
  }

Exemple #8

0

Afficher le fichier

Fichier : SegmentInfos.java Projet : fullstorydev/lucene-solr

 /** Returns a copy of this instance, also copying each SegmentInfo. */
 @Override
 public SegmentInfos clone() {
   try {
     final SegmentInfos sis = (SegmentInfos) super.clone();
     // deep clone, first recreate all collections:
     sis.segments = new ArrayList<>(size());
     for (final SegmentCommitInfo info : this) {
       assert info.info.getCodec() != null;
       // dont directly access segments, use add method!!!
       sis.add(info.clone());
     }
     sis.userData = new HashMap<>(userData);
     return sis;
   } catch (CloneNotSupportedException e) {
     throw new RuntimeException("should not happen", e);
   }
 }

Exemple #9

0

Afficher le fichier

Fichier : MergePolicy.java Projet : jarvisxiong/read-open-source-code

 /**
  * Return the byte size of the provided {@link SegmentCommitInfo}, pro-rated by percentage of
  * non-deleted documents is set.
  */
 protected long size(SegmentCommitInfo info) throws IOException {
   long byteSize = info.sizeInBytes();
   int delCount = writer.get().numDeletedDocs(info);
   double delRatio =
       (info.info.getDocCount() <= 0
           ? 0.0f
           : ((float) delCount / (float) info.info.getDocCount()));
   assert delRatio <= 1.0;
   return (info.info.getDocCount() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio)));
 }

Exemple #10

0

Afficher le fichier

Fichier : BufferedUpdatesStream.java Projet : jarvisxiong/read-open-source-code

  /**
   * Resolves the buffered deleted Term/Query/docIDs, into actual deleted docIDs in the liveDocs
   * MutableBits for each SegmentReader.
   */
  public synchronized ApplyDeletesResult applyDeletesAndUpdates(
      IndexWriter.ReaderPool readerPool, List<SegmentCommitInfo> infos) throws IOException {
    final long t0 = System.currentTimeMillis();

    if (infos.size() == 0) {
      return new ApplyDeletesResult(false, nextGen++, null);
    }

    assert checkDeleteStats();

    if (!any()) {
      if (infoStream.isEnabled("BD")) {
        infoStream.message("BD", "applyDeletes: no deletes; skipping");
      }
      return new ApplyDeletesResult(false, nextGen++, null);
    }

    if (infoStream.isEnabled("BD")) {
      infoStream.message("BD", "applyDeletes: infos=" + infos + " packetCount=" + updates.size());
    }

    final long gen = nextGen++;

    List<SegmentCommitInfo> infos2 = new ArrayList<SegmentCommitInfo>();
    infos2.addAll(infos);
    Collections.sort(infos2, sortSegInfoByDelGen);

    CoalescedUpdates coalescedUpdates = null;
    boolean anyNewDeletes = false;

    int infosIDX = infos2.size() - 1;
    int delIDX = updates.size() - 1;

    List<SegmentCommitInfo> allDeleted = null;

    while (infosIDX >= 0) {
      // System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);

      final FrozenBufferedUpdates packet = delIDX >= 0 ? updates.get(delIDX) : null;
      final SegmentCommitInfo info = infos2.get(infosIDX);
      final long segGen = info.getBufferedDeletesGen();

      if (packet != null && segGen < packet.delGen()) {
        //        System.out.println("  coalesce");
        if (coalescedUpdates == null) {
          coalescedUpdates = new CoalescedUpdates();
        }
        if (!packet.isSegmentPrivate) {
          /*
           * Only coalesce if we are NOT on a segment private del packet: the segment private del packet
           * must only applied to segments with the same delGen.  Yet, if a segment is already deleted
           * from the SI since it had no more documents remaining after some del packets younger than
           * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been
           * removed.
           */
          coalescedUpdates.update(packet);
        }

        delIDX--;
      } else if (packet != null && segGen == packet.delGen()) {
        assert packet.isSegmentPrivate
            : "Packet and Segments deletegen can only match on a segment private del packet gen="
                + segGen;
        // System.out.println("  eq");

        // Lock order: IW -> BD -> RP
        assert readerPool.infoIsLive(info);
        final ReadersAndUpdates rld = readerPool.get(info, true);
        final SegmentReader reader = rld.getReader(IOContext.READ);
        int delCount = 0;
        final boolean segAllDeletes;
        try {
          Map<String, NumericFieldUpdates> fieldUpdates = null;
          if (coalescedUpdates != null) {
            // System.out.println("    del coalesced");
            delCount += applyTermDeletes(coalescedUpdates.termsIterable(), rld, reader);
            delCount += applyQueryDeletes(coalescedUpdates.queriesIterable(), rld, reader);
            fieldUpdates =
                applyNumericDocValuesUpdates(
                    coalescedUpdates.numericDVUpdates, rld, reader, fieldUpdates);
          }
          // System.out.println("    del exact");
          // Don't delete by Term here; DocumentsWriterPerThread
          // already did that on flush:
          delCount += applyQueryDeletes(packet.queriesIterable(), rld, reader);
          fieldUpdates =
              applyNumericDocValuesUpdates(
                  Arrays.asList(packet.updates), rld, reader, fieldUpdates);
          if (!fieldUpdates.isEmpty()) {
            rld.writeFieldUpdates(info.info.dir, fieldUpdates);
          }
          final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
          assert fullDelCount <= rld.info.info.getDocCount();
          segAllDeletes = fullDelCount == rld.info.info.getDocCount();
        } finally {
          rld.release(reader);
          readerPool.release(rld);
        }
        anyNewDeletes |= delCount > 0;

        if (segAllDeletes) {
          if (allDeleted == null) {
            allDeleted = new ArrayList<SegmentCommitInfo>();
          }
          allDeleted.add(info);
        }

        if (infoStream.isEnabled("BD")) {
          infoStream.message(
              "BD",
              "seg="
                  + info
                  + " segGen="
                  + segGen
                  + " segDeletes=["
                  + packet
                  + "]; coalesced deletes=["
                  + (coalescedUpdates == null ? "null" : coalescedUpdates)
                  + "] newDelCount="
                  + delCount
                  + (segAllDeletes ? " 100% deleted" : ""));
        }

        if (coalescedUpdates == null) {
          coalescedUpdates = new CoalescedUpdates();
        }

        /*
         * Since we are on a segment private del packet we must not
         * update the coalescedDeletes here! We can simply advance to the
         * next packet and seginfo.
         */
        delIDX--;
        infosIDX--;
        info.setBufferedDeletesGen(gen);

      } else {
        // System.out.println("  gt");

        if (coalescedUpdates != null) {
          // Lock order: IW -> BD -> RP
          assert readerPool.infoIsLive(info);
          final ReadersAndUpdates rld = readerPool.get(info, true);
          final SegmentReader reader = rld.getReader(IOContext.READ);
          int delCount = 0;
          final boolean segAllDeletes;
          try {
            delCount += applyTermDeletes(coalescedUpdates.termsIterable(), rld, reader);
            delCount += applyQueryDeletes(coalescedUpdates.queriesIterable(), rld, reader);
            Map<String, NumericFieldUpdates> fieldUpdates =
                applyNumericDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, null);
            if (!fieldUpdates.isEmpty()) {
              rld.writeFieldUpdates(info.info.dir, fieldUpdates);
            }
            final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
            assert fullDelCount <= rld.info.info.getDocCount();
            segAllDeletes = fullDelCount == rld.info.info.getDocCount();
          } finally {
            rld.release(reader);
            readerPool.release(rld);
          }
          anyNewDeletes |= delCount > 0;

          if (segAllDeletes) {
            if (allDeleted == null) {
              allDeleted = new ArrayList<SegmentCommitInfo>();
            }
            allDeleted.add(info);
          }

          if (infoStream.isEnabled("BD")) {
            infoStream.message(
                "BD",
                "seg="
                    + info
                    + " segGen="
                    + segGen
                    + " coalesced deletes=["
                    + coalescedUpdates
                    + "] newDelCount="
                    + delCount
                    + (segAllDeletes ? " 100% deleted" : ""));
          }
        }
        info.setBufferedDeletesGen(gen);

        infosIDX--;
      }
    }

    assert checkDeleteStats();
    if (infoStream.isEnabled("BD")) {
      infoStream.message("BD", "applyDeletes took " + (System.currentTimeMillis() - t0) + " msec");
    }
    // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos +
    // " any=" + any;

    return new ApplyDeletesResult(anyNewDeletes, gen, allDeleted);
  }

Exemple #11

0

Afficher le fichier

Fichier : TestRollingUpdates.java Projet : PATRIC3/p3_solr

  @Test
  public void testRollingUpdates() throws Exception {
    Random random = new Random(random().nextLong());
    final BaseDirectoryWrapper dir = newDirectory();
    // test checks for no unref'ed files with the IW helper method, which isn't aware of "tried to
    // delete files"
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
    }

    final LineFileDocs docs = new LineFileDocs(random, true);

    // provider.register(new MemoryCodec());
    if (random().nextBoolean()) {
      Codec.setDefault(
          TestUtil.alwaysPostingsFormat(
              new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
    }

    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

    final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
    final int SIZE = atLeast(20);
    int id = 0;
    IndexReader r = null;
    IndexSearcher s = null;
    final int numUpdates =
        (int)
            (SIZE * (2 + (TEST_NIGHTLY ? 200 * random().nextDouble() : 5 * random().nextDouble())));
    if (VERBOSE) {
      System.out.println("TEST: numUpdates=" + numUpdates);
    }
    int updateCount = 0;
    // TODO: sometimes update ids not in order...
    for (int docIter = 0; docIter < numUpdates; docIter++) {
      final Document doc = docs.nextDoc();
      final String myID = Integer.toString(id);
      if (id == SIZE - 1) {
        id = 0;
      } else {
        id++;
      }
      if (VERBOSE) {
        System.out.println("  docIter=" + docIter + " id=" + id);
      }
      ((Field) doc.getField("docid")).setStringValue(myID);

      Term idTerm = new Term("docid", myID);

      final boolean doUpdate;
      if (s != null && updateCount < SIZE) {
        TopDocs hits = s.search(new TermQuery(idTerm), 1);
        assertEquals(1, hits.totalHits);
        doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
        if (VERBOSE) {
          if (doUpdate) {
            System.out.println("  tryDeleteDocument failed");
          } else {
            System.out.println("  tryDeleteDocument succeeded");
          }
        }
      } else {
        doUpdate = true;
        if (VERBOSE) {
          System.out.println("  no searcher: doUpdate=true");
        }
      }

      updateCount++;

      if (doUpdate) {
        if (random().nextBoolean()) {
          w.updateDocument(idTerm, doc);
        } else {
          // It's OK to not be atomic for this test (no separate thread reopening readers):
          w.deleteDocuments(new TermQuery(idTerm));
          w.addDocument(doc);
        }
      } else {
        w.addDocument(doc);
      }

      if (docIter >= SIZE && random().nextInt(50) == 17) {
        if (r != null) {
          r.close();
        }

        final boolean applyDeletions = random().nextBoolean();

        if (VERBOSE) {
          System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
        }

        r = w.getReader(applyDeletions);
        if (applyDeletions) {
          s = newSearcher(r);
        } else {
          s = null;
        }
        assertTrue(
            "applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE,
            !applyDeletions || r.numDocs() == SIZE);
        updateCount = 0;
      }
    }

    if (r != null) {
      r.close();
    }

    w.commit();
    assertEquals(SIZE, w.numDocs());

    w.close();

    TestIndexWriter.assertNoUnreferencedFiles(dir, "leftover files after rolling updates");

    docs.close();

    // LUCENE-4455:
    SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
    long totalBytes = 0;
    for (SegmentCommitInfo sipc : infos) {
      totalBytes += sipc.sizeInBytes();
    }
    long totalBytes2 = 0;

    for (String fileName : dir.listAll()) {
      if (IndexFileNames.CODEC_FILE_PATTERN.matcher(fileName).matches()) {
        totalBytes2 += dir.fileLength(fileName);
      }
    }
    assertEquals(totalBytes2, totalBytes);
    dir.close();
  }

Exemple #12

0

Afficher le fichier

Fichier : SegmentInfos.java Projet : fullstorydev/lucene-solr

  private void write(Directory directory) throws IOException {

    long nextGeneration = getNextPendingGeneration();
    String segmentFileName =
        IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration);

    // Always advance the generation on write:
    generation = nextGeneration;

    IndexOutput segnOutput = null;
    boolean success = false;

    try {
      segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
      CodecUtil.writeIndexHeader(
          segnOutput,
          "segments",
          VERSION_CURRENT,
          StringHelper.randomId(),
          Long.toString(nextGeneration, Character.MAX_RADIX));
      segnOutput.writeVInt(Version.LATEST.major);
      segnOutput.writeVInt(Version.LATEST.minor);
      segnOutput.writeVInt(Version.LATEST.bugfix);

      segnOutput.writeLong(version);
      segnOutput.writeInt(counter); // write counter
      segnOutput.writeInt(size());

      if (size() > 0) {

        Version minSegmentVersion = null;

        // We do a separate loop up front so we can write the minSegmentVersion before
        // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time:
        for (SegmentCommitInfo siPerCommit : this) {
          Version segmentVersion = siPerCommit.info.getVersion();
          if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) {
            minSegmentVersion = segmentVersion;
          }
        }

        segnOutput.writeVInt(minSegmentVersion.major);
        segnOutput.writeVInt(minSegmentVersion.minor);
        segnOutput.writeVInt(minSegmentVersion.bugfix);
      }

      // write infos
      for (SegmentCommitInfo siPerCommit : this) {
        SegmentInfo si = siPerCommit.info;
        segnOutput.writeString(si.name);
        byte segmentID[] = si.getId();
        // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore
        if (segmentID == null) {
          segnOutput.writeByte((byte) 0);
        } else {
          if (segmentID.length != StringHelper.ID_LENGTH) {
            throw new IllegalStateException(
                "cannot write segment: invalid id segment="
                    + si.name
                    + "id="
                    + StringHelper.idToString(segmentID));
          }
          segnOutput.writeByte((byte) 1);
          segnOutput.writeBytes(segmentID, segmentID.length);
        }
        segnOutput.writeString(si.getCodec().getName());
        segnOutput.writeLong(siPerCommit.getDelGen());
        int delCount = siPerCommit.getDelCount();
        if (delCount < 0 || delCount > si.maxDoc()) {
          throw new IllegalStateException(
              "cannot write segment: invalid maxDoc segment="
                  + si.name
                  + " maxDoc="
                  + si.maxDoc()
                  + " delCount="
                  + delCount);
        }
        segnOutput.writeInt(delCount);
        segnOutput.writeLong(siPerCommit.getFieldInfosGen());
        segnOutput.writeLong(siPerCommit.getDocValuesGen());
        segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles());
        final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles();
        segnOutput.writeInt(dvUpdatesFiles.size());
        for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) {
          segnOutput.writeInt(e.getKey());
          segnOutput.writeSetOfStrings(e.getValue());
        }
      }
      segnOutput.writeMapOfStrings(userData);
      CodecUtil.writeFooter(segnOutput);
      segnOutput.close();
      directory.sync(Collections.singleton(segmentFileName));
      success = true;
    } finally {
      if (success) {
        pendingCommit = true;
      } else {
        // We hit an exception above; try to close the file
        // but suppress any exception:
        IOUtils.closeWhileHandlingException(segnOutput);
        // Try not to leave a truncated segments_N file in
        // the index:
        IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName);
      }
    }
  }

Exemple #13

0

Afficher le fichier

Fichier : SegmentInfos.java Projet : fullstorydev/lucene-solr

  /**
   * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in
   * process.
   *
   * @param directory -- directory containing the segments file
   * @param segmentFileName -- segment file to load
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
  public static final SegmentInfos readCommit(Directory directory, String segmentFileName)
      throws IOException {

    long generation = generationFromSegmentsFileName(segmentFileName);
    try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
      // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
      // to read the magic ourselves.
      int magic = input.readInt();
      if (magic != CodecUtil.CODEC_MAGIC) {
        throw new IndexFormatTooOldException(
            input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
      }
      // 4.0+
      int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_CURRENT);
      // 5.0+
      byte id[] = null;
      if (format >= VERSION_50) {
        id = new byte[StringHelper.ID_LENGTH];
        input.readBytes(id, 0, id.length);
        CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
      }

      SegmentInfos infos = new SegmentInfos();
      infos.id = id;
      infos.generation = generation;
      infos.lastGeneration = generation;
      if (format >= VERSION_53) {
        // TODO: in the future (7.0?  sigh) we can use this to throw IndexFormatTooOldException ...
        // or just rely on the
        // minSegmentLuceneVersion check instead:
        infos.luceneVersion =
            Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
      } else {
        // else compute the min version down below in the for loop
      }

      infos.version = input.readLong();
      infos.counter = input.readInt();
      int numSegments = input.readInt();
      if (numSegments < 0) {
        throw new CorruptIndexException("invalid segment count: " + numSegments, input);
      }

      if (format >= VERSION_53) {
        if (numSegments > 0) {
          infos.minSegmentLuceneVersion =
              Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
          if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) {
            throw new IndexFormatTooOldException(
                input,
                "this index contains a too-old segment (version: "
                    + infos.minSegmentLuceneVersion
                    + ")");
          }
        } else {
          // else leave as null: no segments
        }
      } else {
        // else we recompute it below as we visit segments; it can't be used for throwing
        // IndexFormatTooOldExc, but consumers of
        // SegmentInfos can maybe still use it for other reasons
      }

      long totalDocs = 0;
      for (int seg = 0; seg < numSegments; seg++) {
        String segName = input.readString();
        final byte segmentID[];
        if (format >= VERSION_50) {
          byte hasID = input.readByte();
          if (hasID == 1) {
            segmentID = new byte[StringHelper.ID_LENGTH];
            input.readBytes(segmentID, 0, segmentID.length);
          } else if (hasID == 0) {
            segmentID = null; // 4.x segment, doesn't have an ID
          } else {
            throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
          }
        } else {
          segmentID = null;
        }
        Codec codec = readCodec(input, format < VERSION_53);
        SegmentInfo info =
            codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
        info.setCodec(codec);
        totalDocs += info.maxDoc();
        long delGen = input.readLong();
        int delCount = input.readInt();
        if (delCount < 0 || delCount > info.maxDoc()) {
          throw new CorruptIndexException(
              "invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
        }
        long fieldInfosGen = -1;
        if (format >= VERSION_46) {
          fieldInfosGen = input.readLong();
        }
        long dvGen = -1;
        if (format >= VERSION_49) {
          dvGen = input.readLong();
        } else {
          dvGen = fieldInfosGen;
        }
        SegmentCommitInfo siPerCommit =
            new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen);
        if (format >= VERSION_46) {
          if (format < VERSION_49) {
            // Recorded per-generation files, which were buggy (see
            // LUCENE-5636). We need to read and keep them so we continue to
            // reference those files. Unfortunately it means that the files will
            // be referenced even if the fields are updated again, until the
            // segment is merged.
            final int numGensUpdatesFiles = input.readInt();
            final Map<Long, Set<String>> genUpdatesFiles;
            if (numGensUpdatesFiles == 0) {
              genUpdatesFiles = Collections.emptyMap();
            } else {
              genUpdatesFiles = new HashMap<>(numGensUpdatesFiles);
              for (int i = 0; i < numGensUpdatesFiles; i++) {
                genUpdatesFiles.put(input.readLong(), input.readStringSet());
              }
            }
            siPerCommit.setGenUpdatesFiles(genUpdatesFiles);
          } else {
            if (format >= VERSION_51) {
              siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
            } else {
              siPerCommit.setFieldInfosFiles(Collections.unmodifiableSet(input.readStringSet()));
            }
            final Map<Integer, Set<String>> dvUpdateFiles;
            final int numDVFields = input.readInt();
            if (numDVFields == 0) {
              dvUpdateFiles = Collections.emptyMap();
            } else {
              Map<Integer, Set<String>> map = new HashMap<>(numDVFields);
              for (int i = 0; i < numDVFields; i++) {
                if (format >= VERSION_51) {
                  map.put(input.readInt(), input.readSetOfStrings());
                } else {
                  map.put(input.readInt(), Collections.unmodifiableSet(input.readStringSet()));
                }
              }
              dvUpdateFiles = Collections.unmodifiableMap(map);
            }
            siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
          }
        }
        infos.add(siPerCommit);

        Version segmentVersion = info.getVersion();
        if (format < VERSION_53) {
          if (infos.minSegmentLuceneVersion == null
              || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
            infos.minSegmentLuceneVersion = segmentVersion;
          }
        } else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
          throw new CorruptIndexException(
              "segments file recorded minSegmentLuceneVersion="
                  + infos.minSegmentLuceneVersion
                  + " but segment="
                  + info
                  + " has older version="
                  + segmentVersion,
              input);
        }
      }

      if (format >= VERSION_51) {
        infos.userData = input.readMapOfStrings();
      } else {
        infos.userData = Collections.unmodifiableMap(input.readStringStringMap());
      }

      if (format >= VERSION_48) {
        CodecUtil.checkFooter(input);
      } else {
        final long checksumNow = input.getChecksum();
        final long checksumThen = input.readLong();
        if (checksumNow != checksumThen) {
          throw new CorruptIndexException(
              "checksum failed (hardware problem?) : expected="
                  + Long.toHexString(checksumThen)
                  + " actual="
                  + Long.toHexString(checksumNow),
              input);
        }
        CodecUtil.checkEOF(input);
      }

      // LUCENE-6299: check we are in bounds
      if (totalDocs > IndexWriter.getActualMaxDocs()) {
        throw new CorruptIndexException(
            "Too many documents: an index cannot exceed "
                + IndexWriter.getActualMaxDocs()
                + " but readers have total maxDoc="
                + totalDocs,
            input);
      }

      return infos;
    }
  }

Exemple #14

0

Afficher le fichier

Fichier : TieredMergePolicy.java Projet : silvestrelosada/KnowledgeSharingPlatform

  /** Expert: scores one merge; subclasses can override. */
  protected MergeScore score(
      List<SegmentCommitInfo> candidate, boolean hitTooLarge, long mergingBytes, IndexWriter writer)
      throws IOException {
    long totBeforeMergeBytes = 0;
    long totAfterMergeBytes = 0;
    long totAfterMergeBytesFloored = 0;
    for (SegmentCommitInfo info : candidate) {
      final long segBytes = size(info, writer);
      totAfterMergeBytes += segBytes;
      totAfterMergeBytesFloored += floorSize(segBytes);
      totBeforeMergeBytes += info.sizeInBytes();
    }

    // Roughly measure "skew" of the merge, i.e. how
    // "balanced" the merge is (whether the segments are
    // about the same size), which can range from
    // 1.0/numSegsBeingMerged (good) to 1.0 (poor). Heavily
    // lopsided merges (skew near 1.0) is no good; it means
    // O(N^2) merge cost over time:
    final double skew;
    if (hitTooLarge) {
      // Pretend the merge has perfect skew; skew doesn't
      // matter in this case because this merge will not
      // "cascade" and so it cannot lead to N^2 merge cost
      // over time:
      skew = 1.0 / maxMergeAtOnce;
    } else {
      skew = ((double) floorSize(size(candidate.get(0), writer))) / totAfterMergeBytesFloored;
    }

    // Strongly favor merges with less skew (smaller
    // mergeScore is better):
    double mergeScore = skew;

    // Gently favor smaller merges over bigger ones.  We
    // don't want to make this exponent too large else we
    // can end up doing poor merges of small segments in
    // order to avoid the large merges:
    mergeScore *= Math.pow(totAfterMergeBytes, 0.05);

    // Strongly favor merges that reclaim deletes:
    final double nonDelRatio = ((double) totAfterMergeBytes) / totBeforeMergeBytes;
    mergeScore *= Math.pow(nonDelRatio, reclaimDeletesWeight);

    final double finalMergeScore = mergeScore;

    return new MergeScore() {

      @Override
      public double getScore() {
        return finalMergeScore;
      }

      @Override
      public String getExplanation() {
        return "skew="
            + String.format(Locale.ROOT, "%.3f", skew)
            + " nonDelRatio="
            + String.format(Locale.ROOT, "%.3f", nonDelRatio);
      }
    };
  }