@Override public MergeSpecification findMerges( MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) throws IOException { MergeSpecification ms = new MergeSpecification(); if (doMerge) { OneMerge om = new OneMerge(segmentInfos.asList().subList(start, start + length)); ms.add(om); doMerge = false; return ms; } return null; }
/** * Replaces all segments in this instance, but keeps generation, version, counter so that future * commits remain write once. */ void replace(SegmentInfos other) { rollbackSegmentInfos(other.asList()); lastGeneration = other.lastGeneration; }
@Override public MergeSpecification findMerges( MergeTrigger mergeTrigger, SegmentInfos infos, IndexWriter writer) throws IOException { if (verbose(writer)) { message("findMerges: " + infos.size() + " segments", writer); } if (infos.size() == 0) { return null; } final Collection<SegmentCommitInfo> merging = writer.getMergingSegments(); final Collection<SegmentCommitInfo> toBeMerged = new HashSet<>(); final List<SegmentCommitInfo> infosSorted = new ArrayList<>(infos.asList()); Collections.sort(infosSorted, new SegmentByteSizeDescending(writer)); // Compute total index bytes & print details about the index long totIndexBytes = 0; long minSegmentBytes = Long.MAX_VALUE; for (SegmentCommitInfo info : infosSorted) { final long segBytes = size(info, writer); if (verbose(writer)) { String extra = merging.contains(info) ? " [merging]" : ""; if (segBytes >= maxMergedSegmentBytes / 2.0) { extra += " [skip: too large]"; } else if (segBytes < floorSegmentBytes) { extra += " [floored]"; } message( " seg=" + writer.segString(info) + " size=" + String.format(Locale.ROOT, "%.3f", segBytes / 1024 / 1024.) + " MB" + extra, writer); } minSegmentBytes = Math.min(segBytes, minSegmentBytes); // Accum total byte size totIndexBytes += segBytes; } // If we have too-large segments, grace them out // of the maxSegmentCount: int tooBigCount = 0; while (tooBigCount < infosSorted.size()) { long segBytes = size(infosSorted.get(tooBigCount), writer); if (segBytes < maxMergedSegmentBytes / 2.0) { break; } totIndexBytes -= segBytes; tooBigCount++; } minSegmentBytes = floorSize(minSegmentBytes); // Compute max allowed segs in the index long levelSize = minSegmentBytes; long bytesLeft = totIndexBytes; double allowedSegCount = 0; while (true) { final double segCountLevel = bytesLeft / (double) levelSize; if (segCountLevel < segsPerTier) { allowedSegCount += Math.ceil(segCountLevel); break; } allowedSegCount += segsPerTier; bytesLeft -= segsPerTier * levelSize; levelSize *= maxMergeAtOnce; } int allowedSegCountInt = (int) allowedSegCount; MergeSpecification spec = null; // Cycle to possibly select more than one merge: while (true) { long mergingBytes = 0; // Gather eligible segments for merging, ie segments // not already being merged and not already picked (by // prior iteration of this loop) for merging: final List<SegmentCommitInfo> eligible = new ArrayList<>(); for (int idx = tooBigCount; idx < infosSorted.size(); idx++) { final SegmentCommitInfo info = infosSorted.get(idx); if (merging.contains(info)) { mergingBytes += size(info, writer); } else if (!toBeMerged.contains(info)) { eligible.add(info); } } final boolean maxMergeIsRunning = mergingBytes >= maxMergedSegmentBytes; if (verbose(writer)) { message( " allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.size() + " (eligible count=" + eligible.size() + ") tooBigCount=" + tooBigCount, writer); } if (eligible.size() == 0) { return spec; } if (eligible.size() > allowedSegCountInt) { // OK we are over budget -- find best merge! MergeScore bestScore = null; List<SegmentCommitInfo> best = null; boolean bestTooLarge = false; long bestMergeBytes = 0; // Consider all merge starts: for (int startIdx = 0; startIdx <= eligible.size() - maxMergeAtOnce; startIdx++) { long totAfterMergeBytes = 0; final List<SegmentCommitInfo> candidate = new ArrayList<>(); boolean hitTooLarge = false; for (int idx = startIdx; idx < eligible.size() && candidate.size() < maxMergeAtOnce; idx++) { final SegmentCommitInfo info = eligible.get(idx); final long segBytes = size(info, writer); if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) { hitTooLarge = true; // NOTE: we continue, so that we can try // "packing" smaller segments into this merge // to see if we can get closer to the max // size; this in general is not perfect since // this is really "bin packing" and we'd have // to try different permutations. continue; } candidate.add(info); totAfterMergeBytes += segBytes; } // We should never see an empty candidate: we iterated over maxMergeAtOnce // segments, and already pre-excluded the too-large segments: assert candidate.size() > 0; final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer); if (verbose(writer)) { message( " maybe=" + writer.segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes / 1024. / 1024.), writer); } // If we are already running a max sized merge // (maxMergeIsRunning), don't allow another max // sized merge to kick off: if ((bestScore == null || score.getScore() < bestScore.getScore()) && (!hitTooLarge || !maxMergeIsRunning)) { best = candidate; bestScore = score; bestTooLarge = hitTooLarge; bestMergeBytes = totAfterMergeBytes; } } if (best != null) { if (spec == null) { spec = new MergeSpecification(); } final OneMerge merge = new OneMerge(best); spec.add(merge); for (SegmentCommitInfo info : merge.segments) { toBeMerged.add(info); } if (verbose(writer)) { message( " add merge=" + writer.segString(merge.segments) + " size=" + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes / 1024. / 1024.) + " score=" + String.format(Locale.ROOT, "%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""), writer); } } else { return spec; } } else { return spec; } } }