@Override public MergeSpecification findForcedDeletesMerges(SegmentInfos infos, IndexWriter writer) throws IOException { if (verbose(writer)) { message( "findForcedDeletesMerges infos=" + writer.segString(infos) + " forceMergeDeletesPctAllowed=" + forceMergeDeletesPctAllowed, writer); } final List<SegmentCommitInfo> eligible = new ArrayList<>(); final Collection<SegmentCommitInfo> merging = writer.getMergingSegments(); for (SegmentCommitInfo info : infos) { double pctDeletes = 100. * ((double) writer.numDeletedDocs(info)) / info.info.getDocCount(); if (pctDeletes > forceMergeDeletesPctAllowed && !merging.contains(info)) { eligible.add(info); } } if (eligible.size() == 0) { return null; } Collections.sort(eligible, new SegmentByteSizeDescending(writer)); if (verbose(writer)) { message("eligible=" + eligible, writer); } int start = 0; MergeSpecification spec = null; while (start < eligible.size()) { // Don't enforce max merged size here: app is explicitly // calling forceMergeDeletes, and knows this may take a // long time / produce big segments (like forceMerge): final int end = Math.min(start + maxMergeAtOnceExplicit, eligible.size()); if (spec == null) { spec = new MergeSpecification(); } final OneMerge merge = new OneMerge(eligible.subList(start, end)); if (verbose(writer)) { message("add merge=" + writer.segString(merge.segments), writer); } spec.add(merge); start = end; } return spec; }
@Override public MergeSpecification findMerges( MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) throws IOException { MergeSpecification ms = new MergeSpecification(); if (doMerge) { OneMerge om = new OneMerge(segmentInfos.asList().subList(start, start + length)); ms.add(om); doMerge = false; return ms; } return null; }
@Override public MergeSpecification findForcedMerges( SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfo, Boolean> segmentsToMerge) throws CorruptIndexException, IOException { List<SegmentInfo> activeSegmentsToOptimize = new Vector<SegmentInfo>(); for (SegmentInfo segmentInfo : segmentInfos) { if (segmentsToMerge.get(segmentInfo)) { activeSegmentsToOptimize.add(segmentInfo); } } MergeSpecification mergeSpec = new MergeSpecification(); if (activeSegmentsToOptimize.size() > 0) { OneMerge merge = new OneMerge(activeSegmentsToOptimize); mergeSpec.add(merge); } return mergeSpec; }
@Override public MergeSpecification findForcedMerges( SegmentInfos infos, int maxSegmentCount, Map<SegmentCommitInfo, Boolean> segmentsToMerge, IndexWriter writer) throws IOException { if (verbose(writer)) { message( "findForcedMerges maxSegmentCount=" + maxSegmentCount + " infos=" + writer.segString(infos) + " segmentsToMerge=" + segmentsToMerge, writer); } List<SegmentCommitInfo> eligible = new ArrayList<>(); boolean forceMergeRunning = false; final Collection<SegmentCommitInfo> merging = writer.getMergingSegments(); boolean segmentIsOriginal = false; for (SegmentCommitInfo info : infos) { final Boolean isOriginal = segmentsToMerge.get(info); if (isOriginal != null) { segmentIsOriginal = isOriginal; if (!merging.contains(info)) { eligible.add(info); } else { forceMergeRunning = true; } } } if (eligible.size() == 0) { return null; } if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) || (maxSegmentCount == 1 && eligible.size() == 1 && (!segmentIsOriginal || isMerged(infos, eligible.get(0), writer)))) { if (verbose(writer)) { message("already merged", writer); } return null; } Collections.sort(eligible, new SegmentByteSizeDescending(writer)); if (verbose(writer)) { message("eligible=" + eligible, writer); message("forceMergeRunning=" + forceMergeRunning, writer); } int end = eligible.size(); MergeSpecification spec = null; // Do full merges, first, backwards: while (end >= maxMergeAtOnceExplicit + maxSegmentCount - 1) { if (spec == null) { spec = new MergeSpecification(); } final OneMerge merge = new OneMerge(eligible.subList(end - maxMergeAtOnceExplicit, end)); if (verbose(writer)) { message("add merge=" + writer.segString(merge.segments), writer); } spec.add(merge); end -= maxMergeAtOnceExplicit; } if (spec == null && !forceMergeRunning) { // Do final merge final int numToMerge = end - maxSegmentCount + 1; final OneMerge merge = new OneMerge(eligible.subList(end - numToMerge, end)); if (verbose(writer)) { message("add final merge=" + merge.segString(), writer); } spec = new MergeSpecification(); spec.add(merge); } return spec; }
@Override public MergeSpecification findMerges( MergeTrigger mergeTrigger, SegmentInfos infos, IndexWriter writer) throws IOException { if (verbose(writer)) { message("findMerges: " + infos.size() + " segments", writer); } if (infos.size() == 0) { return null; } final Collection<SegmentCommitInfo> merging = writer.getMergingSegments(); final Collection<SegmentCommitInfo> toBeMerged = new HashSet<>(); final List<SegmentCommitInfo> infosSorted = new ArrayList<>(infos.asList()); Collections.sort(infosSorted, new SegmentByteSizeDescending(writer)); // Compute total index bytes & print details about the index long totIndexBytes = 0; long minSegmentBytes = Long.MAX_VALUE; for (SegmentCommitInfo info : infosSorted) { final long segBytes = size(info, writer); if (verbose(writer)) { String extra = merging.contains(info) ? " [merging]" : ""; if (segBytes >= maxMergedSegmentBytes / 2.0) { extra += " [skip: too large]"; } else if (segBytes < floorSegmentBytes) { extra += " [floored]"; } message( " seg=" + writer.segString(info) + " size=" + String.format(Locale.ROOT, "%.3f", segBytes / 1024 / 1024.) + " MB" + extra, writer); } minSegmentBytes = Math.min(segBytes, minSegmentBytes); // Accum total byte size totIndexBytes += segBytes; } // If we have too-large segments, grace them out // of the maxSegmentCount: int tooBigCount = 0; while (tooBigCount < infosSorted.size()) { long segBytes = size(infosSorted.get(tooBigCount), writer); if (segBytes < maxMergedSegmentBytes / 2.0) { break; } totIndexBytes -= segBytes; tooBigCount++; } minSegmentBytes = floorSize(minSegmentBytes); // Compute max allowed segs in the index long levelSize = minSegmentBytes; long bytesLeft = totIndexBytes; double allowedSegCount = 0; while (true) { final double segCountLevel = bytesLeft / (double) levelSize; if (segCountLevel < segsPerTier) { allowedSegCount += Math.ceil(segCountLevel); break; } allowedSegCount += segsPerTier; bytesLeft -= segsPerTier * levelSize; levelSize *= maxMergeAtOnce; } int allowedSegCountInt = (int) allowedSegCount; MergeSpecification spec = null; // Cycle to possibly select more than one merge: while (true) { long mergingBytes = 0; // Gather eligible segments for merging, ie segments // not already being merged and not already picked (by // prior iteration of this loop) for merging: final List<SegmentCommitInfo> eligible = new ArrayList<>(); for (int idx = tooBigCount; idx < infosSorted.size(); idx++) { final SegmentCommitInfo info = infosSorted.get(idx); if (merging.contains(info)) { mergingBytes += size(info, writer); } else if (!toBeMerged.contains(info)) { eligible.add(info); } } final boolean maxMergeIsRunning = mergingBytes >= maxMergedSegmentBytes; if (verbose(writer)) { message( " allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.size() + " (eligible count=" + eligible.size() + ") tooBigCount=" + tooBigCount, writer); } if (eligible.size() == 0) { return spec; } if (eligible.size() > allowedSegCountInt) { // OK we are over budget -- find best merge! MergeScore bestScore = null; List<SegmentCommitInfo> best = null; boolean bestTooLarge = false; long bestMergeBytes = 0; // Consider all merge starts: for (int startIdx = 0; startIdx <= eligible.size() - maxMergeAtOnce; startIdx++) { long totAfterMergeBytes = 0; final List<SegmentCommitInfo> candidate = new ArrayList<>(); boolean hitTooLarge = false; for (int idx = startIdx; idx < eligible.size() && candidate.size() < maxMergeAtOnce; idx++) { final SegmentCommitInfo info = eligible.get(idx); final long segBytes = size(info, writer); if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) { hitTooLarge = true; // NOTE: we continue, so that we can try // "packing" smaller segments into this merge // to see if we can get closer to the max // size; this in general is not perfect since // this is really "bin packing" and we'd have // to try different permutations. continue; } candidate.add(info); totAfterMergeBytes += segBytes; } // We should never see an empty candidate: we iterated over maxMergeAtOnce // segments, and already pre-excluded the too-large segments: assert candidate.size() > 0; final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer); if (verbose(writer)) { message( " maybe=" + writer.segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes / 1024. / 1024.), writer); } // If we are already running a max sized merge // (maxMergeIsRunning), don't allow another max // sized merge to kick off: if ((bestScore == null || score.getScore() < bestScore.getScore()) && (!hitTooLarge || !maxMergeIsRunning)) { best = candidate; bestScore = score; bestTooLarge = hitTooLarge; bestMergeBytes = totAfterMergeBytes; } } if (best != null) { if (spec == null) { spec = new MergeSpecification(); } final OneMerge merge = new OneMerge(best); spec.add(merge); for (SegmentCommitInfo info : merge.segments) { toBeMerged.add(info); } if (verbose(writer)) { message( " add merge=" + writer.segString(merge.segments) + " size=" + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes / 1024. / 1024.) + " score=" + String.format(Locale.ROOT, "%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""), writer); } } else { return spec; } } else { return spec; } } }