/** Create and return a new MergeThread */ protected synchronized MergeThread getMergeThread(IndexWriter writer, OneMerge merge) throws IOException { final MergeThread thread = new MergeThread(writer, merge); thread.setDaemon(true); thread.setName("Lucene Merge Thread #" + mergeThreadCount++); return thread; }
/** * Wait for any running merge threads to finish. This call is not interruptible as used by {@link * #close()}. */ public void sync() { boolean interrupted = false; try { while (true) { MergeThread toSync = null; synchronized (this) { for (MergeThread t : mergeThreads) { if (t.isAlive()) { toSync = t; break; } } } if (toSync != null) { try { toSync.join(); } catch (InterruptedException ie) { // ignore this Exception, we will retry until all threads are dead interrupted = true; } } else { break; } } } finally { // finally, restore interrupt status: if (interrupted) Thread.currentThread().interrupt(); } }
/** * Returns the number of merge threads that are alive, ignoring the calling thread if it is a * merge thread. Note that this number is ≤ {@link #mergeThreads} size. * * @lucene.internal */ public synchronized int mergeThreadCount() { Thread currentThread = Thread.currentThread(); int count = 0; for (MergeThread mergeThread : mergeThreads) { if (currentThread != mergeThread && mergeThread.isAlive() && mergeThread.merge.rateLimiter.getAbort() == false) { count++; } } return count; }
private boolean isBacklog(long now, OneMerge merge) { double mergeMB = bytesToMB(merge.estimatedMergeBytes); for (MergeThread mergeThread : mergeThreads) { long mergeStartNS = mergeThread.merge.mergeStartNS; if (mergeThread.isAlive() && mergeThread.merge != merge && mergeStartNS != -1 && mergeThread.merge.estimatedMergeBytes >= MIN_BIG_MERGE_MB * 1024 * 1024 && nsToSec(now - mergeStartNS) > 3.0) { double otherMergeMB = bytesToMB(mergeThread.merge.estimatedMergeBytes); double ratio = otherMergeMB / mergeMB; if (ratio > 0.3 && ratio < 3.0) { return true; } } } return false; }
@Override public synchronized void merge(IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws IOException { assert !Thread.holdsLock(writer); initDynamicDefaults(writer); if (trigger == MergeTrigger.CLOSING) { // Disable throttling on close: targetMBPerSec = MAX_MERGE_MB_PER_SEC; updateMergeThreads(); } // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. if (verbose()) { message("now merge"); message(" index: " + writer.segString()); } // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { if (maybeStall(writer) == false) { break; } OneMerge merge = writer.getNextMerge(); if (merge == null) { if (verbose()) { message(" no more merges pending; now return"); } return; } updateIOThrottle(merge); boolean success = false; try { if (verbose()) { message(" consider merge " + writer.segString(merge.segments)); } // OK to spawn a new merge thread to handle this // merge: final MergeThread merger = getMergeThread(writer, merge); mergeThreads.add(merger); if (verbose()) { message(" launch new thread [" + merger.getName() + "]"); } merger.start(); updateMergeThreads(); success = true; } finally { if (!success) { writer.mergeFinish(merge); } } } }
/** * Called whenever the running merges have changed, to set merge IO limits. This method sorts the * merge threads by their merge size in descending order and then pauses/unpauses threads from * first to last -- that way, smaller merges are guaranteed to run before larger ones. */ protected synchronized void updateMergeThreads() { // Only look at threads that are alive & not in the // process of stopping (ie have an active merge): final List<MergeThread> activeMerges = new ArrayList<>(); int threadIdx = 0; while (threadIdx < mergeThreads.size()) { final MergeThread mergeThread = mergeThreads.get(threadIdx); if (!mergeThread.isAlive()) { // Prune any dead threads mergeThreads.remove(threadIdx); continue; } activeMerges.add(mergeThread); threadIdx++; } // Sort the merge threads, largest first: CollectionUtil.timSort(activeMerges); final int activeMergeCount = activeMerges.size(); int bigMergeCount = 0; for (threadIdx = activeMergeCount - 1; threadIdx >= 0; threadIdx--) { MergeThread mergeThread = activeMerges.get(threadIdx); if (mergeThread.merge.estimatedMergeBytes > MIN_BIG_MERGE_MB * 1024 * 1024) { bigMergeCount = 1 + threadIdx; break; } } long now = System.nanoTime(); StringBuilder message; if (verbose()) { message = new StringBuilder(); message.append( String.format( Locale.ROOT, "updateMergeThreads ioThrottle=%s targetMBPerSec=%.1f MB/sec", doAutoIOThrottle, targetMBPerSec)); } else { message = null; } for (threadIdx = 0; threadIdx < activeMergeCount; threadIdx++) { MergeThread mergeThread = activeMerges.get(threadIdx); OneMerge merge = mergeThread.merge; // pause the thread if maxThreadCount is smaller than the number of merge threads. final boolean doPause = threadIdx < bigMergeCount - maxThreadCount; double newMBPerSec; if (doPause) { newMBPerSec = 0.0; } else if (merge.maxNumSegments != -1) { newMBPerSec = forceMergeMBPerSec; } else if (doAutoIOThrottle == false) { newMBPerSec = Double.POSITIVE_INFINITY; } else if (merge.estimatedMergeBytes < MIN_BIG_MERGE_MB * 1024 * 1024) { // Don't rate limit small merges: newMBPerSec = Double.POSITIVE_INFINITY; } else { newMBPerSec = targetMBPerSec; } double curMBPerSec = merge.rateLimiter.getMBPerSec(); if (verbose()) { long mergeStartNS = merge.mergeStartNS; if (mergeStartNS == -1) { // IndexWriter didn't start the merge yet: mergeStartNS = now; } message.append('\n'); message.append( String.format( Locale.ROOT, "merge thread %s estSize=%.1f MB (written=%.1f MB) runTime=%.1fs (stopped=%.1fs, paused=%.1fs) rate=%s\n", mergeThread.getName(), bytesToMB(merge.estimatedMergeBytes), bytesToMB(merge.rateLimiter.totalBytesWritten), nsToSec(now - mergeStartNS), nsToSec(merge.rateLimiter.getTotalStoppedNS()), nsToSec(merge.rateLimiter.getTotalPausedNS()), rateToString(merge.rateLimiter.getMBPerSec()))); if (newMBPerSec != curMBPerSec) { if (newMBPerSec == 0.0) { message.append(" now stop"); } else if (curMBPerSec == 0.0) { if (newMBPerSec == Double.POSITIVE_INFINITY) { message.append(" now resume"); } else { message.append( String.format(Locale.ROOT, " now resume to %.1f MB/sec", newMBPerSec)); } } else { message.append( String.format( Locale.ROOT, " now change from %.1f MB/sec to %.1f MB/sec", curMBPerSec, newMBPerSec)); } } else if (curMBPerSec == 0.0) { message.append(" leave stopped"); } else { message.append(String.format(Locale.ROOT, " leave running at %.1f MB/sec", curMBPerSec)); } } merge.rateLimiter.setMBPerSec(newMBPerSec); } if (verbose()) { message(message.toString()); } }
@Test public void testInMemoryAndOnDiskMerger() throws Throwable { JobID jobId = new JobID("a", 0); TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 0), 0); TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 1), 0); TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 2), 0); LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR); MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>( reduceId1, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles()); // write map outputs Map<String, String> map1 = new TreeMap<String, String>(); map1.put("apple", "disgusting"); map1.put("carrot", "delicious"); Map<String, String> map2 = new TreeMap<String, String>(); map1.put("banana", "pretty good"); byte[] mapOutputBytes1 = writeMapOutput(conf, map1); byte[] mapOutputBytes2 = writeMapOutput(conf, map2); InMemoryMapOutput<Text, Text> mapOutput1 = new InMemoryMapOutput<Text, Text>( conf, mapId1, mergeManager, mapOutputBytes1.length, null, true); InMemoryMapOutput<Text, Text> mapOutput2 = new InMemoryMapOutput<Text, Text>( conf, mapId2, mergeManager, mapOutputBytes2.length, null, true); System.arraycopy(mapOutputBytes1, 0, mapOutput1.getMemory(), 0, mapOutputBytes1.length); System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0, mapOutputBytes2.length); // create merger and run merge MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger = mergeManager.createInMemoryMerger(); List<InMemoryMapOutput<Text, Text>> mapOutputs1 = new ArrayList<InMemoryMapOutput<Text, Text>>(); mapOutputs1.add(mapOutput1); mapOutputs1.add(mapOutput2); inMemoryMerger.merge(mapOutputs1); Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size()); TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 3), 0); TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 4), 0); TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 5), 0); // write map outputs Map<String, String> map3 = new TreeMap<String, String>(); map3.put("apple", "awesome"); map3.put("carrot", "amazing"); Map<String, String> map4 = new TreeMap<String, String>(); map4.put("banana", "bla"); byte[] mapOutputBytes3 = writeMapOutput(conf, map3); byte[] mapOutputBytes4 = writeMapOutput(conf, map4); InMemoryMapOutput<Text, Text> mapOutput3 = new InMemoryMapOutput<Text, Text>( conf, mapId3, mergeManager, mapOutputBytes3.length, null, true); InMemoryMapOutput<Text, Text> mapOutput4 = new InMemoryMapOutput<Text, Text>( conf, mapId4, mergeManager, mapOutputBytes4.length, null, true); System.arraycopy(mapOutputBytes3, 0, mapOutput3.getMemory(), 0, mapOutputBytes3.length); System.arraycopy(mapOutputBytes4, 0, mapOutput4.getMemory(), 0, mapOutputBytes4.length); // // create merger and run merge MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger2 = mergeManager.createInMemoryMerger(); List<InMemoryMapOutput<Text, Text>> mapOutputs2 = new ArrayList<InMemoryMapOutput<Text, Text>>(); mapOutputs2.add(mapOutput3); mapOutputs2.add(mapOutput4); inMemoryMerger2.merge(mapOutputs2); Assert.assertEquals(2, mergeManager.onDiskMapOutputs.size()); List<CompressAwarePath> paths = new ArrayList<CompressAwarePath>(); Iterator<CompressAwarePath> iterator = mergeManager.onDiskMapOutputs.iterator(); List<String> keys = new ArrayList<String>(); List<String> values = new ArrayList<String>(); while (iterator.hasNext()) { CompressAwarePath next = iterator.next(); readOnDiskMapOutput(conf, fs, next, keys, values); paths.add(next); } Assert.assertEquals( keys, Arrays.asList("apple", "banana", "carrot", "apple", "banana", "carrot")); Assert.assertEquals( values, Arrays.asList("awesome", "bla", "amazing", "disgusting", "pretty good", "delicious")); mergeManager.close(); mergeManager = new MergeManagerImpl<Text, Text>( reduceId2, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles()); MergeThread<CompressAwarePath, Text, Text> onDiskMerger = mergeManager.createOnDiskMerger(); onDiskMerger.merge(paths); Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size()); keys = new ArrayList<String>(); values = new ArrayList<String>(); readOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.iterator().next(), keys, values); Assert.assertEquals( keys, Arrays.asList("apple", "apple", "banana", "banana", "carrot", "carrot")); Assert.assertEquals( values, Arrays.asList("awesome", "disgusting", "pretty good", "bla", "amazing", "delicious")); mergeManager.close(); Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size()); Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size()); Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size()); }