/** Create and return a new MergeThread */
 protected synchronized MergeThread getMergeThread(IndexWriter writer, OneMerge merge)
     throws IOException {
   final MergeThread thread = new MergeThread(writer, merge);
   thread.setDaemon(true);
   thread.setName("Lucene Merge Thread #" + mergeThreadCount++);
   return thread;
 }
 /**
  * Wait for any running merge threads to finish. This call is not interruptible as used by {@link
  * #close()}.
  */
 public void sync() {
   boolean interrupted = false;
   try {
     while (true) {
       MergeThread toSync = null;
       synchronized (this) {
         for (MergeThread t : mergeThreads) {
           if (t.isAlive()) {
             toSync = t;
             break;
           }
         }
       }
       if (toSync != null) {
         try {
           toSync.join();
         } catch (InterruptedException ie) {
           // ignore this Exception, we will retry until all threads are dead
           interrupted = true;
         }
       } else {
         break;
       }
     }
   } finally {
     // finally, restore interrupt status:
     if (interrupted) Thread.currentThread().interrupt();
   }
 }
 /**
  * Returns the number of merge threads that are alive, ignoring the calling thread if it is a
  * merge thread. Note that this number is ≤ {@link #mergeThreads} size.
  *
  * @lucene.internal
  */
 public synchronized int mergeThreadCount() {
   Thread currentThread = Thread.currentThread();
   int count = 0;
   for (MergeThread mergeThread : mergeThreads) {
     if (currentThread != mergeThread
         && mergeThread.isAlive()
         && mergeThread.merge.rateLimiter.getAbort() == false) {
       count++;
     }
   }
   return count;
 }
  private boolean isBacklog(long now, OneMerge merge) {
    double mergeMB = bytesToMB(merge.estimatedMergeBytes);
    for (MergeThread mergeThread : mergeThreads) {
      long mergeStartNS = mergeThread.merge.mergeStartNS;
      if (mergeThread.isAlive()
          && mergeThread.merge != merge
          && mergeStartNS != -1
          && mergeThread.merge.estimatedMergeBytes >= MIN_BIG_MERGE_MB * 1024 * 1024
          && nsToSec(now - mergeStartNS) > 3.0) {
        double otherMergeMB = bytesToMB(mergeThread.merge.estimatedMergeBytes);
        double ratio = otherMergeMB / mergeMB;
        if (ratio > 0.3 && ratio < 3.0) {
          return true;
        }
      }
    }

    return false;
  }
  @Override
  public synchronized void merge(IndexWriter writer, MergeTrigger trigger, boolean newMergesFound)
      throws IOException {

    assert !Thread.holdsLock(writer);

    initDynamicDefaults(writer);

    if (trigger == MergeTrigger.CLOSING) {
      // Disable throttling on close:
      targetMBPerSec = MAX_MERGE_MB_PER_SEC;
      updateMergeThreads();
    }

    // First, quickly run through the newly proposed merges
    // and add any orthogonal merges (ie a merge not
    // involving segments already pending to be merged) to
    // the queue.  If we are way behind on merging, many of
    // these newly proposed merges will likely already be
    // registered.

    if (verbose()) {
      message("now merge");
      message("  index: " + writer.segString());
    }

    // Iterate, pulling from the IndexWriter's queue of
    // pending merges, until it's empty:
    while (true) {

      if (maybeStall(writer) == false) {
        break;
      }

      OneMerge merge = writer.getNextMerge();
      if (merge == null) {
        if (verbose()) {
          message("  no more merges pending; now return");
        }
        return;
      }

      updateIOThrottle(merge);

      boolean success = false;
      try {
        if (verbose()) {
          message("  consider merge " + writer.segString(merge.segments));
        }

        // OK to spawn a new merge thread to handle this
        // merge:
        final MergeThread merger = getMergeThread(writer, merge);
        mergeThreads.add(merger);

        if (verbose()) {
          message("    launch new thread [" + merger.getName() + "]");
        }

        merger.start();
        updateMergeThreads();

        success = true;
      } finally {
        if (!success) {
          writer.mergeFinish(merge);
        }
      }
    }
  }
  /**
   * Called whenever the running merges have changed, to set merge IO limits. This method sorts the
   * merge threads by their merge size in descending order and then pauses/unpauses threads from
   * first to last -- that way, smaller merges are guaranteed to run before larger ones.
   */
  protected synchronized void updateMergeThreads() {

    // Only look at threads that are alive & not in the
    // process of stopping (ie have an active merge):
    final List<MergeThread> activeMerges = new ArrayList<>();

    int threadIdx = 0;
    while (threadIdx < mergeThreads.size()) {
      final MergeThread mergeThread = mergeThreads.get(threadIdx);
      if (!mergeThread.isAlive()) {
        // Prune any dead threads
        mergeThreads.remove(threadIdx);
        continue;
      }
      activeMerges.add(mergeThread);
      threadIdx++;
    }

    // Sort the merge threads, largest first:
    CollectionUtil.timSort(activeMerges);

    final int activeMergeCount = activeMerges.size();

    int bigMergeCount = 0;

    for (threadIdx = activeMergeCount - 1; threadIdx >= 0; threadIdx--) {
      MergeThread mergeThread = activeMerges.get(threadIdx);
      if (mergeThread.merge.estimatedMergeBytes > MIN_BIG_MERGE_MB * 1024 * 1024) {
        bigMergeCount = 1 + threadIdx;
        break;
      }
    }

    long now = System.nanoTime();

    StringBuilder message;
    if (verbose()) {
      message = new StringBuilder();
      message.append(
          String.format(
              Locale.ROOT,
              "updateMergeThreads ioThrottle=%s targetMBPerSec=%.1f MB/sec",
              doAutoIOThrottle,
              targetMBPerSec));
    } else {
      message = null;
    }

    for (threadIdx = 0; threadIdx < activeMergeCount; threadIdx++) {
      MergeThread mergeThread = activeMerges.get(threadIdx);

      OneMerge merge = mergeThread.merge;

      // pause the thread if maxThreadCount is smaller than the number of merge threads.
      final boolean doPause = threadIdx < bigMergeCount - maxThreadCount;

      double newMBPerSec;
      if (doPause) {
        newMBPerSec = 0.0;
      } else if (merge.maxNumSegments != -1) {
        newMBPerSec = forceMergeMBPerSec;
      } else if (doAutoIOThrottle == false) {
        newMBPerSec = Double.POSITIVE_INFINITY;
      } else if (merge.estimatedMergeBytes < MIN_BIG_MERGE_MB * 1024 * 1024) {
        // Don't rate limit small merges:
        newMBPerSec = Double.POSITIVE_INFINITY;
      } else {
        newMBPerSec = targetMBPerSec;
      }

      double curMBPerSec = merge.rateLimiter.getMBPerSec();

      if (verbose()) {
        long mergeStartNS = merge.mergeStartNS;
        if (mergeStartNS == -1) {
          // IndexWriter didn't start the merge yet:
          mergeStartNS = now;
        }
        message.append('\n');
        message.append(
            String.format(
                Locale.ROOT,
                "merge thread %s estSize=%.1f MB (written=%.1f MB) runTime=%.1fs (stopped=%.1fs, paused=%.1fs) rate=%s\n",
                mergeThread.getName(),
                bytesToMB(merge.estimatedMergeBytes),
                bytesToMB(merge.rateLimiter.totalBytesWritten),
                nsToSec(now - mergeStartNS),
                nsToSec(merge.rateLimiter.getTotalStoppedNS()),
                nsToSec(merge.rateLimiter.getTotalPausedNS()),
                rateToString(merge.rateLimiter.getMBPerSec())));

        if (newMBPerSec != curMBPerSec) {
          if (newMBPerSec == 0.0) {
            message.append("  now stop");
          } else if (curMBPerSec == 0.0) {
            if (newMBPerSec == Double.POSITIVE_INFINITY) {
              message.append("  now resume");
            } else {
              message.append(
                  String.format(Locale.ROOT, "  now resume to %.1f MB/sec", newMBPerSec));
            }
          } else {
            message.append(
                String.format(
                    Locale.ROOT,
                    "  now change from %.1f MB/sec to %.1f MB/sec",
                    curMBPerSec,
                    newMBPerSec));
          }
        } else if (curMBPerSec == 0.0) {
          message.append("  leave stopped");
        } else {
          message.append(String.format(Locale.ROOT, "  leave running at %.1f MB/sec", curMBPerSec));
        }
      }

      merge.rateLimiter.setMBPerSec(newMBPerSec);
    }
    if (verbose()) {
      message(message.toString());
    }
  }
Exemplo n.º 7
0
  @Test
  public void testInMemoryAndOnDiskMerger() throws Throwable {
    JobID jobId = new JobID("a", 0);
    TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 0), 0);
    TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 1), 0);
    TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 2), 0);

    LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);

    MergeManagerImpl<Text, Text> mergeManager =
        new MergeManagerImpl<Text, Text>(
            reduceId1,
            jobConf,
            fs,
            lda,
            Reporter.NULL,
            null,
            null,
            null,
            null,
            null,
            null,
            null,
            new Progress(),
            new MROutputFiles());

    // write map outputs
    Map<String, String> map1 = new TreeMap<String, String>();
    map1.put("apple", "disgusting");
    map1.put("carrot", "delicious");
    Map<String, String> map2 = new TreeMap<String, String>();
    map1.put("banana", "pretty good");
    byte[] mapOutputBytes1 = writeMapOutput(conf, map1);
    byte[] mapOutputBytes2 = writeMapOutput(conf, map2);
    InMemoryMapOutput<Text, Text> mapOutput1 =
        new InMemoryMapOutput<Text, Text>(
            conf, mapId1, mergeManager, mapOutputBytes1.length, null, true);
    InMemoryMapOutput<Text, Text> mapOutput2 =
        new InMemoryMapOutput<Text, Text>(
            conf, mapId2, mergeManager, mapOutputBytes2.length, null, true);
    System.arraycopy(mapOutputBytes1, 0, mapOutput1.getMemory(), 0, mapOutputBytes1.length);
    System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0, mapOutputBytes2.length);

    // create merger and run merge
    MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger =
        mergeManager.createInMemoryMerger();
    List<InMemoryMapOutput<Text, Text>> mapOutputs1 =
        new ArrayList<InMemoryMapOutput<Text, Text>>();
    mapOutputs1.add(mapOutput1);
    mapOutputs1.add(mapOutput2);

    inMemoryMerger.merge(mapOutputs1);

    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());

    TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 3), 0);
    TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 4), 0);
    TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 5), 0);
    // write map outputs
    Map<String, String> map3 = new TreeMap<String, String>();
    map3.put("apple", "awesome");
    map3.put("carrot", "amazing");
    Map<String, String> map4 = new TreeMap<String, String>();
    map4.put("banana", "bla");
    byte[] mapOutputBytes3 = writeMapOutput(conf, map3);
    byte[] mapOutputBytes4 = writeMapOutput(conf, map4);
    InMemoryMapOutput<Text, Text> mapOutput3 =
        new InMemoryMapOutput<Text, Text>(
            conf, mapId3, mergeManager, mapOutputBytes3.length, null, true);
    InMemoryMapOutput<Text, Text> mapOutput4 =
        new InMemoryMapOutput<Text, Text>(
            conf, mapId4, mergeManager, mapOutputBytes4.length, null, true);
    System.arraycopy(mapOutputBytes3, 0, mapOutput3.getMemory(), 0, mapOutputBytes3.length);
    System.arraycopy(mapOutputBytes4, 0, mapOutput4.getMemory(), 0, mapOutputBytes4.length);

    //    // create merger and run merge
    MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger2 =
        mergeManager.createInMemoryMerger();
    List<InMemoryMapOutput<Text, Text>> mapOutputs2 =
        new ArrayList<InMemoryMapOutput<Text, Text>>();
    mapOutputs2.add(mapOutput3);
    mapOutputs2.add(mapOutput4);

    inMemoryMerger2.merge(mapOutputs2);

    Assert.assertEquals(2, mergeManager.onDiskMapOutputs.size());

    List<CompressAwarePath> paths = new ArrayList<CompressAwarePath>();
    Iterator<CompressAwarePath> iterator = mergeManager.onDiskMapOutputs.iterator();
    List<String> keys = new ArrayList<String>();
    List<String> values = new ArrayList<String>();
    while (iterator.hasNext()) {
      CompressAwarePath next = iterator.next();
      readOnDiskMapOutput(conf, fs, next, keys, values);
      paths.add(next);
    }
    Assert.assertEquals(
        keys, Arrays.asList("apple", "banana", "carrot", "apple", "banana", "carrot"));
    Assert.assertEquals(
        values,
        Arrays.asList("awesome", "bla", "amazing", "disgusting", "pretty good", "delicious"));
    mergeManager.close();

    mergeManager =
        new MergeManagerImpl<Text, Text>(
            reduceId2,
            jobConf,
            fs,
            lda,
            Reporter.NULL,
            null,
            null,
            null,
            null,
            null,
            null,
            null,
            new Progress(),
            new MROutputFiles());

    MergeThread<CompressAwarePath, Text, Text> onDiskMerger = mergeManager.createOnDiskMerger();
    onDiskMerger.merge(paths);

    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());

    keys = new ArrayList<String>();
    values = new ArrayList<String>();
    readOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.iterator().next(), keys, values);
    Assert.assertEquals(
        keys, Arrays.asList("apple", "apple", "banana", "banana", "carrot", "carrot"));
    Assert.assertEquals(
        values,
        Arrays.asList("awesome", "disgusting", "pretty good", "bla", "amazing", "delicious"));

    mergeManager.close();
    Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size());
    Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
    Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size());
  }