@Override
 public MergeSpecification findMerges(
     MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer)
     throws IOException {
   MergeSpecification ms = new MergeSpecification();
   if (doMerge) {
     OneMerge om = new OneMerge(segmentInfos.asList().subList(start, start + length));
     ms.add(om);
     doMerge = false;
     return ms;
   }
   return null;
 }
Esempio n. 2
0
 /**
  * Replaces all segments in this instance, but keeps generation, version, counter so that future
  * commits remain write once.
  */
 void replace(SegmentInfos other) {
   rollbackSegmentInfos(other.asList());
   lastGeneration = other.lastGeneration;
 }
  @Override
  public MergeSpecification findMerges(
      MergeTrigger mergeTrigger, SegmentInfos infos, IndexWriter writer) throws IOException {
    if (verbose(writer)) {
      message("findMerges: " + infos.size() + " segments", writer);
    }
    if (infos.size() == 0) {
      return null;
    }
    final Collection<SegmentCommitInfo> merging = writer.getMergingSegments();
    final Collection<SegmentCommitInfo> toBeMerged = new HashSet<>();

    final List<SegmentCommitInfo> infosSorted = new ArrayList<>(infos.asList());
    Collections.sort(infosSorted, new SegmentByteSizeDescending(writer));

    // Compute total index bytes & print details about the index
    long totIndexBytes = 0;
    long minSegmentBytes = Long.MAX_VALUE;
    for (SegmentCommitInfo info : infosSorted) {
      final long segBytes = size(info, writer);
      if (verbose(writer)) {
        String extra = merging.contains(info) ? " [merging]" : "";
        if (segBytes >= maxMergedSegmentBytes / 2.0) {
          extra += " [skip: too large]";
        } else if (segBytes < floorSegmentBytes) {
          extra += " [floored]";
        }
        message(
            "  seg="
                + writer.segString(info)
                + " size="
                + String.format(Locale.ROOT, "%.3f", segBytes / 1024 / 1024.)
                + " MB"
                + extra,
            writer);
      }

      minSegmentBytes = Math.min(segBytes, minSegmentBytes);
      // Accum total byte size
      totIndexBytes += segBytes;
    }

    // If we have too-large segments, grace them out
    // of the maxSegmentCount:
    int tooBigCount = 0;
    while (tooBigCount < infosSorted.size()) {
      long segBytes = size(infosSorted.get(tooBigCount), writer);
      if (segBytes < maxMergedSegmentBytes / 2.0) {
        break;
      }
      totIndexBytes -= segBytes;
      tooBigCount++;
    }

    minSegmentBytes = floorSize(minSegmentBytes);

    // Compute max allowed segs in the index
    long levelSize = minSegmentBytes;
    long bytesLeft = totIndexBytes;
    double allowedSegCount = 0;
    while (true) {
      final double segCountLevel = bytesLeft / (double) levelSize;
      if (segCountLevel < segsPerTier) {
        allowedSegCount += Math.ceil(segCountLevel);
        break;
      }
      allowedSegCount += segsPerTier;
      bytesLeft -= segsPerTier * levelSize;
      levelSize *= maxMergeAtOnce;
    }
    int allowedSegCountInt = (int) allowedSegCount;

    MergeSpecification spec = null;

    // Cycle to possibly select more than one merge:
    while (true) {

      long mergingBytes = 0;

      // Gather eligible segments for merging, ie segments
      // not already being merged and not already picked (by
      // prior iteration of this loop) for merging:
      final List<SegmentCommitInfo> eligible = new ArrayList<>();
      for (int idx = tooBigCount; idx < infosSorted.size(); idx++) {
        final SegmentCommitInfo info = infosSorted.get(idx);
        if (merging.contains(info)) {
          mergingBytes += size(info, writer);
        } else if (!toBeMerged.contains(info)) {
          eligible.add(info);
        }
      }

      final boolean maxMergeIsRunning = mergingBytes >= maxMergedSegmentBytes;

      if (verbose(writer)) {
        message(
            "  allowedSegmentCount="
                + allowedSegCountInt
                + " vs count="
                + infosSorted.size()
                + " (eligible count="
                + eligible.size()
                + ") tooBigCount="
                + tooBigCount,
            writer);
      }

      if (eligible.size() == 0) {
        return spec;
      }

      if (eligible.size() > allowedSegCountInt) {

        // OK we are over budget -- find best merge!
        MergeScore bestScore = null;
        List<SegmentCommitInfo> best = null;
        boolean bestTooLarge = false;
        long bestMergeBytes = 0;

        // Consider all merge starts:
        for (int startIdx = 0; startIdx <= eligible.size() - maxMergeAtOnce; startIdx++) {

          long totAfterMergeBytes = 0;

          final List<SegmentCommitInfo> candidate = new ArrayList<>();
          boolean hitTooLarge = false;
          for (int idx = startIdx;
              idx < eligible.size() && candidate.size() < maxMergeAtOnce;
              idx++) {
            final SegmentCommitInfo info = eligible.get(idx);
            final long segBytes = size(info, writer);

            if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
              hitTooLarge = true;
              // NOTE: we continue, so that we can try
              // "packing" smaller segments into this merge
              // to see if we can get closer to the max
              // size; this in general is not perfect since
              // this is really "bin packing" and we'd have
              // to try different permutations.
              continue;
            }
            candidate.add(info);
            totAfterMergeBytes += segBytes;
          }

          // We should never see an empty candidate: we iterated over maxMergeAtOnce
          // segments, and already pre-excluded the too-large segments:
          assert candidate.size() > 0;

          final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer);
          if (verbose(writer)) {
            message(
                "  maybe="
                    + writer.segString(candidate)
                    + " score="
                    + score.getScore()
                    + " "
                    + score.getExplanation()
                    + " tooLarge="
                    + hitTooLarge
                    + " size="
                    + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes / 1024. / 1024.),
                writer);
          }

          // If we are already running a max sized merge
          // (maxMergeIsRunning), don't allow another max
          // sized merge to kick off:
          if ((bestScore == null || score.getScore() < bestScore.getScore())
              && (!hitTooLarge || !maxMergeIsRunning)) {
            best = candidate;
            bestScore = score;
            bestTooLarge = hitTooLarge;
            bestMergeBytes = totAfterMergeBytes;
          }
        }

        if (best != null) {
          if (spec == null) {
            spec = new MergeSpecification();
          }
          final OneMerge merge = new OneMerge(best);
          spec.add(merge);
          for (SegmentCommitInfo info : merge.segments) {
            toBeMerged.add(info);
          }

          if (verbose(writer)) {
            message(
                "  add merge="
                    + writer.segString(merge.segments)
                    + " size="
                    + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes / 1024. / 1024.)
                    + " score="
                    + String.format(Locale.ROOT, "%.3f", bestScore.getScore())
                    + " "
                    + bestScore.getExplanation()
                    + (bestTooLarge ? " [max merge]" : ""),
                writer);
          }
        } else {
          return spec;
        }
      } else {
        return spec;
      }
    }
  }