public void run() {
      long shortestObservedTimeAroundLoop = Long.MAX_VALUE;

      observedLasUpdateTime = consensusLatestTime.get();
      long now = TimeServices.nanoTime();
      long prevNow = now;
      consensusLatestTime.compareAndSet(observedLasUpdateTime, now);

      while ((stopThreadMask & threadMask) == 0) {
        if (sleepInterval != 0) {
          TimeServices.sleepNanos(sleepInterval);
        }

        // This is ***TEST FUNCTIONALITY***: Spin as long as we are externally asked to stall:
        while ((stallThreadMask & threadMask) != 0) ;

        observedLasUpdateTime = consensusLatestTime.get();
        // Volatile store above makes sure new "now" is measured after observedLasUpdateTime sample
        now = TimeServices.nanoTime();

        // Track shortest time around loop:
        shortestObservedTimeAroundLoop = Math.min(now - prevNow, shortestObservedTimeAroundLoop);

        // Update consensus time as long as it is is the past:
        while (now > observedLasUpdateTime) {
          if (consensusLatestTime.compareAndSet(observedLasUpdateTime, now)) {
            // Successfully and atomically moved consensus time forward. Act on the known delta:
            final long deltaTimeNs = now - observedLasUpdateTime;

            // Calculate hiccup time (accounting for known time around loop):
            long hiccupTime = Math.max(deltaTimeNs - shortestObservedTimeAroundLoop, 0);

            if (hiccupTime > pauseNotificationThreshold) {
              if (verbose) {
                System.out.println(
                    "SimplePauseDetector thread "
                        + threadNumber
                        + ": sending pause notification message: pause of "
                        + hiccupTime
                        + " nsec detected at nanoTime: "
                        + now);
              }
              notifyListeners(hiccupTime, now);
            }
          } else {
            // Failed to atomically move consensus time forward. Try again with current value:
            observedLasUpdateTime = consensusLatestTime.get();
          }
        }

        prevNow = now;
      }
      if (verbose) {
        System.out.println("SimplePauseDetector thread " + threadNumber + " terminating...");
      }
    }
Ejemplo n.º 2
0
    public void run() {
      observedLasUpdateTime = consensusLatestTime.get();
      long now = TimeServices.nanoTime();
      long prevNow = now;
      consensusLatestTime.compareAndSet(observedLasUpdateTime, now);

      long shortestObservedTimeAroundLoop = Long.MAX_VALUE;
      while ((stopThreadMask & threadMask) == 0) {
        if (sleepInterval != 0) {
          TimeServices.sleepNanos(sleepInterval);
        }

        // TEST FUNCTIONALITY: Spin as long as we are asked to stall:
        while ((stallTheadMask & threadMask) != 0) ;

        observedLasUpdateTime = consensusLatestTime.get();
        // Volatile store above makes sure new now is measured after observedLasUpdateTime sample
        now = TimeServices.nanoTime();

        final long timeAroundLoop = now - prevNow;
        if (timeAroundLoop < shortestObservedTimeAroundLoop) {
          shortestObservedTimeAroundLoop = timeAroundLoop;
        }

        // Move consensus forward and act on delta:
        if ((now > observedLasUpdateTime)
            && (consensusLatestTime.compareAndSet(observedLasUpdateTime, now))) {
          final long deltaTimeNs = now - observedLasUpdateTime;

          long hiccupTime = deltaTimeNs - shortestObservedTimeAroundLoop;
          hiccupTime = (hiccupTime < 0) ? 0 : hiccupTime;

          if (hiccupTime > pauseNotificationThreshold) {
            if (verbose) {
              System.out.println(
                  "SimplePauseDetector thread "
                      + threadNumber
                      + ": sending pause notification message: pause of "
                      + hiccupTime
                      + " nsec detected at nanoTime: "
                      + now
                      + " (sleepInterval = "
                      + sleepInterval
                      + " , shortest time around loop = "
                      + shortestObservedTimeAroundLoop
                      + ")");
            }
            notifyListeners(hiccupTime, now);
          }
        }
        prevNow = now;
      }
      if (verbose) {
        System.out.println("SimplePauseDetector thread " + threadNumber + " terminating...");
      }
    }
  /**
   * A test method that allows the caller to artificially stall a requested set of the detector
   * threads for a given amount of time. Used to verify pause detection when consensus occurs, as
   * well as lack of detection when it does not.
   *
   * @param threadNumberMask a mask designating which threads should be stalled.
   * @param stallLength stall length, in nanosecond units
   * @throws InterruptedException if internal sleep implementation throws it
   */
  public void stallDetectorThreads(long threadNumberMask, long stallLength)
      throws InterruptedException {
    long savedMask = stallThreadMask;
    stallThreadMask = threadNumberMask;

    long startTime = TimeServices.nanoTime();
    long endTime = startTime + stallLength;
    for (long remainingTime = stallLength;
        remainingTime > 0;
        remainingTime = endTime - TimeServices.nanoTime()) {
      long timeDelta = Math.min(remainingTime, (pauseNotificationThreshold / 2));
      TimeServices.moveTimeForward(timeDelta);
      TimeUnit.NANOSECONDS.sleep(50000); // give things a chance to propagate.
    }

    stallThreadMask = savedMask;
  }
Ejemplo n.º 4
0
  @Test
  public void testIntervalSampleDeadlock() throws Exception {

    SimplePauseDetector pauseDetector =
        new SimplePauseDetector(
            1000000L /* 1 msec sleep */,
            10000000L /* 10 msec reporting threshold */,
            3 /* thread count */,
            true /* verbose */);

    LatencyStats.setDefaultPauseDetector(pauseDetector);

    final LatencyStats latencyStats = new LatencyStats();

    pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + 115 * MSEC);

    TimeServices.moveTimeForward(5000L);
    TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate
    TimeServices.moveTimeForward(1000000L);
    TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate
    TimeServices.moveTimeForward(2000000L);
    TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate
    TimeServices.moveTimeForward(110000000L);
    TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate

    try {

      TimeUnit.NANOSECONDS.sleep(10 * MSEC); // Make sure things have some time to propagate

      long startTime = TimeServices.nanoTime();
      try {
        latencyStats.recordLatency(Long.MAX_VALUE);
      } catch (java.lang.IndexOutOfBoundsException e) {
        // Suppress, because this is what we expect
      }

      TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate

      ExecutorService executorService = Executors.newFixedThreadPool(1);
      Future<Boolean> future =
          executorService.submit(
              new Runnable() {
                @Override
                public void run() {
                  System.out.println("\nForcing Interval Update; may hang forever:\n");
                  latencyStats.forceIntervalSample();
                  System.out.println("\nCompleted forcing interval sample\n");
                }
              },
              Boolean.TRUE);

      try {
        Boolean response = future.get(5, TimeUnit.SECONDS);
        Assert.assertEquals(Boolean.TRUE, response);
      } catch (TimeoutException e) {
        System.err.println("\nFuture timed out.\n");
        System.out.println("\nMaking sure the thread dies.\n");
        try {
          Field f = LatencyStats.class.getDeclaredField("recordingEndEpoch");
          f.setAccessible(true);
          f.set(latencyStats, (Long) f.get(latencyStats) + 1);
          System.out.println(":" + future.get(5, TimeUnit.SECONDS));
        } catch (Throwable t) {
          t.printStackTrace();
        }
        Assert.fail("Timed out trying to force interval sample.");
      } finally {
        executorService.shutdownNow();
        System.out.println("\nSuccessfully forced interval sample to complete on test failure.\n");
      }

    } catch (InterruptedException ex) {
      // Suppress
    } finally {
      latencyStats.stop();
      pauseDetector.shutdown();
    }
  }
Ejemplo n.º 5
0
  @Test
  public void testLatencyStats() throws Exception {

    SimplePauseDetector pauseDetector =
        new SimplePauseDetector(
            1000000L /* 1 msec sleep */,
            10000000L /* 10 msec reporting threshold */,
            3 /* thread count */,
            true /* verbose */);

    LatencyStats.setDefaultPauseDetector(pauseDetector);

    LatencyStats latencyStats = new LatencyStats();

    pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + 115 * MSEC);

    TimeServices.moveTimeForward(5000L);
    TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate
    TimeServices.moveTimeForward(1000000L);
    TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate
    TimeServices.moveTimeForward(2000000L);
    TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate
    TimeServices.moveTimeForward(110000000L);
    TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate

    try {

      TimeUnit.NANOSECONDS.sleep(10 * MSEC); // Make sure things have some time to propagate

      long startTime = TimeServices.nanoTime();
      System.out.println(
          "@ "
              + (TimeServices.nanoTime() - startTime)
              + " nsec after start: startTime = "
              + startTime);

      long lastTime = startTime;
      for (int i = 0; i < 2000; i++) {
        pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (4 * MSEC));
        TimeServices.moveTimeForwardMsec(5);
        //                TimeUnit.NANOSECONDS.sleep(100000L); // Give things have some time to
        // propagate
        long now = TimeServices.nanoTime();
        latencyStats.recordLatency(now - lastTime);
        lastTime = now;
      }

      TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate

      // @ 10 sec from start
      System.out.println("\nForcing Interval Update:\n");
      latencyStats.forceIntervalSample();

      System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:");
      System.out.println(
          "Estimated interval = "
              + latencyStats.getIntervalEstimator().getEstimatedInterval(lastTime));

      Histogram accumulatedHistogram = latencyStats.getAccumulatedHistogram();
      Histogram intervalHistogram = latencyStats.getIntervalHistogram();

      System.out.println(
          "Accumulated Average latency for 5msec sleeps: "
              + accumulatedHistogram.getMean()
              + ", count = "
              + accumulatedHistogram.getTotalCount());
      System.out.println(
          "Interval Average latency for 5msec sleeps: "
              + intervalHistogram.getMean()
              + ", count = "
              + intervalHistogram.getTotalCount());

      Assert.assertEquals(
          "Accumulated total count should be 2000", 2000, accumulatedHistogram.getTotalCount());

      System.out.println("Pausing detector threads for 5 seconds:");
      pauseDetector.stallDetectorThreads(0x7, 5000 * MSEC);
      TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate

      // @ 15 sec from start

      // Report without forcing interval measurement update:

      System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:");
      System.out.println(
          "Estimated interval = "
              + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime()));
      accumulatedHistogram = latencyStats.getAccumulatedHistogram();
      intervalHistogram = latencyStats.getIntervalHistogram();

      System.out.println(
          "Post-pause, pre-observation Accumulated Average latency for 5msec sleeps: "
              + accumulatedHistogram.getMean()
              + ", count = "
              + accumulatedHistogram.getTotalCount());
      System.out.println(
          "Post-pause, pre-observation Interval Average latency for 5msec sleeps: "
              + intervalHistogram.getMean()
              + ", count = "
              + intervalHistogram.getTotalCount());

      Assert.assertEquals(
          "Accumulated total count should be 2000", 2000, accumulatedHistogram.getTotalCount());

      // Still @ 15 sec from start
      System.out.println("\nForcing Interval Update:\n");
      latencyStats.forceIntervalSample();

      System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:");
      System.out.println(
          "Estimated interval = "
              + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime()));
      accumulatedHistogram = latencyStats.getAccumulatedHistogram();
      intervalHistogram = latencyStats.getIntervalHistogram();

      System.out.println(
          "Post-pause, post-observation Accumulated Average latency for 5msec sleeps: "
              + accumulatedHistogram.getMean()
              + ", count = "
              + accumulatedHistogram.getTotalCount());
      System.out.println(
          "Post-pause, post-observation Interval Average latency for 5msec sleeps: "
              + intervalHistogram.getMean()
              + ", count = "
              + intervalHistogram.getTotalCount());

      Assert.assertEquals(
          "Accumulated total count should be 2000", 2998, accumulatedHistogram.getTotalCount());

      pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (500 * MSEC));
      TimeServices.moveTimeForwardMsec(500);
      TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate

      // @ 15.5 sec from start
      System.out.println("\nForcing Interval Update:\n");
      latencyStats.forceIntervalSample();

      System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:");
      System.out.println(
          "Estimated interval = "
              + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime()));
      accumulatedHistogram = latencyStats.getAccumulatedHistogram();
      intervalHistogram = latencyStats.getIntervalHistogram();

      System.out.println(
          "Post-pause Accumulated Average latency for 5msec sleeps: "
              + accumulatedHistogram.getMean()
              + ", count = "
              + accumulatedHistogram.getTotalCount());
      System.out.println(
          "Post-pause Interval Average latency for 5msec sleeps: "
              + intervalHistogram.getMean()
              + ", count = "
              + intervalHistogram.getTotalCount());

      Assert.assertEquals(
          "Accumulated total count should be 2000", 2998, accumulatedHistogram.getTotalCount());

      pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (500 * MSEC));
      TimeServices.moveTimeForwardMsec(500);
      TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate

      // @ 16 sec from start
      System.out.println("\nForcing Interval Update:\n");
      latencyStats.forceIntervalSample();

      System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:");
      System.out.println(
          "Estimated interval = "
              + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime()));
      accumulatedHistogram = latencyStats.getAccumulatedHistogram();
      intervalHistogram = latencyStats.getIntervalHistogram();

      System.out.println(
          "Post-pause Accumulated Average latency for 5msec sleeps: "
              + accumulatedHistogram.getMean()
              + ", count = "
              + accumulatedHistogram.getTotalCount());
      System.out.println(
          "Post-pause Interval Average latency for 5msec sleeps: "
              + intervalHistogram.getMean()
              + ", count = "
              + intervalHistogram.getTotalCount());

      Assert.assertEquals(
          "Accumulated total count should be 2999", 2998, accumulatedHistogram.getTotalCount());

      pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (2000 * MSEC));
      TimeServices.moveTimeForwardMsec(2000);
      TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate

      // @ 18 sec from start
      System.out.println("\nForcing Interval Update:\n");
      latencyStats.forceIntervalSample();

      System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:");
      System.out.println(
          "Estimated interval = "
              + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime()));
      accumulatedHistogram = latencyStats.getAccumulatedHistogram();
      intervalHistogram = latencyStats.getIntervalHistogram();

      System.out.println(
          "Post-pause Accumulated Average latency for 5msec sleeps: "
              + accumulatedHistogram.getMean()
              + ", count = "
              + accumulatedHistogram.getTotalCount());
      System.out.println(
          "Post-pause Interval Average latency for 5msec sleeps: "
              + intervalHistogram.getMean()
              + ", count = "
              + intervalHistogram.getTotalCount());

      Assert.assertEquals(
          "Accumulated total count should be 2999", 2998, accumulatedHistogram.getTotalCount());

      for (int i = 0; i < 100; i++) {
        pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (5 * MSEC));
        TimeServices.moveTimeForwardMsec(5);
        long now = TimeServices.nanoTime();
        latencyStats.recordLatency(now - lastTime);
        lastTime = now;
      }

      pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (500 * MSEC));
      TimeServices.moveTimeForwardMsec(500);
      TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate

      // @ 19 sec from start
      System.out.println("\nForcing Interval Update:\n");
      latencyStats.forceIntervalSample();

      System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:");
      System.out.println(
          "Estimated interval = "
              + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime()));
      accumulatedHistogram = latencyStats.getAccumulatedHistogram();
      intervalHistogram = latencyStats.getIntervalHistogram();
      System.out.println(
          "Post-pause Accumulated Average latency for 5msec sleeps: "
              + accumulatedHistogram.getMean()
              + ", count = "
              + accumulatedHistogram.getTotalCount());
      System.out.println(
          "Post-pause Interval Average latency for 5msec sleeps: "
              + intervalHistogram.getMean()
              + ", count = "
              + intervalHistogram.getTotalCount());

      pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (500 * MSEC));
      TimeServices.moveTimeForwardMsec(500);
      TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate

      // @ 19.5 sec from start
      System.out.println("\nForcing Interval Update:\n");
      latencyStats.forceIntervalSample();

      System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:");
      System.out.println(
          "Estimated interval = "
              + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime()));
      accumulatedHistogram = latencyStats.getAccumulatedHistogram();
      intervalHistogram = latencyStats.getIntervalHistogram();
      System.out.println(
          "Post-pause Accumulated Average latency for 5msec sleeps: "
              + accumulatedHistogram.getMean()
              + ", count = "
              + accumulatedHistogram.getTotalCount());
      System.out.println(
          "Post-pause Interval Average latency for 5msec sleeps: "
              + intervalHistogram.getMean()
              + ", count = "
              + intervalHistogram.getTotalCount());

      Assert.assertEquals(
          "Accumulated total count should be 2000", 3098, accumulatedHistogram.getTotalCount());
    } catch (InterruptedException ex) {

    }

    latencyStats.stop();

    pauseDetector.shutdown();
  }