public void run() { long shortestObservedTimeAroundLoop = Long.MAX_VALUE; observedLasUpdateTime = consensusLatestTime.get(); long now = TimeServices.nanoTime(); long prevNow = now; consensusLatestTime.compareAndSet(observedLasUpdateTime, now); while ((stopThreadMask & threadMask) == 0) { if (sleepInterval != 0) { TimeServices.sleepNanos(sleepInterval); } // This is ***TEST FUNCTIONALITY***: Spin as long as we are externally asked to stall: while ((stallThreadMask & threadMask) != 0) ; observedLasUpdateTime = consensusLatestTime.get(); // Volatile store above makes sure new "now" is measured after observedLasUpdateTime sample now = TimeServices.nanoTime(); // Track shortest time around loop: shortestObservedTimeAroundLoop = Math.min(now - prevNow, shortestObservedTimeAroundLoop); // Update consensus time as long as it is is the past: while (now > observedLasUpdateTime) { if (consensusLatestTime.compareAndSet(observedLasUpdateTime, now)) { // Successfully and atomically moved consensus time forward. Act on the known delta: final long deltaTimeNs = now - observedLasUpdateTime; // Calculate hiccup time (accounting for known time around loop): long hiccupTime = Math.max(deltaTimeNs - shortestObservedTimeAroundLoop, 0); if (hiccupTime > pauseNotificationThreshold) { if (verbose) { System.out.println( "SimplePauseDetector thread " + threadNumber + ": sending pause notification message: pause of " + hiccupTime + " nsec detected at nanoTime: " + now); } notifyListeners(hiccupTime, now); } } else { // Failed to atomically move consensus time forward. Try again with current value: observedLasUpdateTime = consensusLatestTime.get(); } } prevNow = now; } if (verbose) { System.out.println("SimplePauseDetector thread " + threadNumber + " terminating..."); } }
public void run() { observedLasUpdateTime = consensusLatestTime.get(); long now = TimeServices.nanoTime(); long prevNow = now; consensusLatestTime.compareAndSet(observedLasUpdateTime, now); long shortestObservedTimeAroundLoop = Long.MAX_VALUE; while ((stopThreadMask & threadMask) == 0) { if (sleepInterval != 0) { TimeServices.sleepNanos(sleepInterval); } // TEST FUNCTIONALITY: Spin as long as we are asked to stall: while ((stallTheadMask & threadMask) != 0) ; observedLasUpdateTime = consensusLatestTime.get(); // Volatile store above makes sure new now is measured after observedLasUpdateTime sample now = TimeServices.nanoTime(); final long timeAroundLoop = now - prevNow; if (timeAroundLoop < shortestObservedTimeAroundLoop) { shortestObservedTimeAroundLoop = timeAroundLoop; } // Move consensus forward and act on delta: if ((now > observedLasUpdateTime) && (consensusLatestTime.compareAndSet(observedLasUpdateTime, now))) { final long deltaTimeNs = now - observedLasUpdateTime; long hiccupTime = deltaTimeNs - shortestObservedTimeAroundLoop; hiccupTime = (hiccupTime < 0) ? 0 : hiccupTime; if (hiccupTime > pauseNotificationThreshold) { if (verbose) { System.out.println( "SimplePauseDetector thread " + threadNumber + ": sending pause notification message: pause of " + hiccupTime + " nsec detected at nanoTime: " + now + " (sleepInterval = " + sleepInterval + " , shortest time around loop = " + shortestObservedTimeAroundLoop + ")"); } notifyListeners(hiccupTime, now); } } prevNow = now; } if (verbose) { System.out.println("SimplePauseDetector thread " + threadNumber + " terminating..."); } }
/** * A test method that allows the caller to artificially stall a requested set of the detector * threads for a given amount of time. Used to verify pause detection when consensus occurs, as * well as lack of detection when it does not. * * @param threadNumberMask a mask designating which threads should be stalled. * @param stallLength stall length, in nanosecond units * @throws InterruptedException if internal sleep implementation throws it */ public void stallDetectorThreads(long threadNumberMask, long stallLength) throws InterruptedException { long savedMask = stallThreadMask; stallThreadMask = threadNumberMask; long startTime = TimeServices.nanoTime(); long endTime = startTime + stallLength; for (long remainingTime = stallLength; remainingTime > 0; remainingTime = endTime - TimeServices.nanoTime()) { long timeDelta = Math.min(remainingTime, (pauseNotificationThreshold / 2)); TimeServices.moveTimeForward(timeDelta); TimeUnit.NANOSECONDS.sleep(50000); // give things a chance to propagate. } stallThreadMask = savedMask; }
@Test public void testIntervalSampleDeadlock() throws Exception { SimplePauseDetector pauseDetector = new SimplePauseDetector( 1000000L /* 1 msec sleep */, 10000000L /* 10 msec reporting threshold */, 3 /* thread count */, true /* verbose */); LatencyStats.setDefaultPauseDetector(pauseDetector); final LatencyStats latencyStats = new LatencyStats(); pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + 115 * MSEC); TimeServices.moveTimeForward(5000L); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate TimeServices.moveTimeForward(1000000L); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate TimeServices.moveTimeForward(2000000L); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate TimeServices.moveTimeForward(110000000L); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate try { TimeUnit.NANOSECONDS.sleep(10 * MSEC); // Make sure things have some time to propagate long startTime = TimeServices.nanoTime(); try { latencyStats.recordLatency(Long.MAX_VALUE); } catch (java.lang.IndexOutOfBoundsException e) { // Suppress, because this is what we expect } TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate ExecutorService executorService = Executors.newFixedThreadPool(1); Future<Boolean> future = executorService.submit( new Runnable() { @Override public void run() { System.out.println("\nForcing Interval Update; may hang forever:\n"); latencyStats.forceIntervalSample(); System.out.println("\nCompleted forcing interval sample\n"); } }, Boolean.TRUE); try { Boolean response = future.get(5, TimeUnit.SECONDS); Assert.assertEquals(Boolean.TRUE, response); } catch (TimeoutException e) { System.err.println("\nFuture timed out.\n"); System.out.println("\nMaking sure the thread dies.\n"); try { Field f = LatencyStats.class.getDeclaredField("recordingEndEpoch"); f.setAccessible(true); f.set(latencyStats, (Long) f.get(latencyStats) + 1); System.out.println(":" + future.get(5, TimeUnit.SECONDS)); } catch (Throwable t) { t.printStackTrace(); } Assert.fail("Timed out trying to force interval sample."); } finally { executorService.shutdownNow(); System.out.println("\nSuccessfully forced interval sample to complete on test failure.\n"); } } catch (InterruptedException ex) { // Suppress } finally { latencyStats.stop(); pauseDetector.shutdown(); } }
@Test public void testLatencyStats() throws Exception { SimplePauseDetector pauseDetector = new SimplePauseDetector( 1000000L /* 1 msec sleep */, 10000000L /* 10 msec reporting threshold */, 3 /* thread count */, true /* verbose */); LatencyStats.setDefaultPauseDetector(pauseDetector); LatencyStats latencyStats = new LatencyStats(); pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + 115 * MSEC); TimeServices.moveTimeForward(5000L); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate TimeServices.moveTimeForward(1000000L); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate TimeServices.moveTimeForward(2000000L); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate TimeServices.moveTimeForward(110000000L); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate try { TimeUnit.NANOSECONDS.sleep(10 * MSEC); // Make sure things have some time to propagate long startTime = TimeServices.nanoTime(); System.out.println( "@ " + (TimeServices.nanoTime() - startTime) + " nsec after start: startTime = " + startTime); long lastTime = startTime; for (int i = 0; i < 2000; i++) { pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (4 * MSEC)); TimeServices.moveTimeForwardMsec(5); // TimeUnit.NANOSECONDS.sleep(100000L); // Give things have some time to // propagate long now = TimeServices.nanoTime(); latencyStats.recordLatency(now - lastTime); lastTime = now; } TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate // @ 10 sec from start System.out.println("\nForcing Interval Update:\n"); latencyStats.forceIntervalSample(); System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:"); System.out.println( "Estimated interval = " + latencyStats.getIntervalEstimator().getEstimatedInterval(lastTime)); Histogram accumulatedHistogram = latencyStats.getAccumulatedHistogram(); Histogram intervalHistogram = latencyStats.getIntervalHistogram(); System.out.println( "Accumulated Average latency for 5msec sleeps: " + accumulatedHistogram.getMean() + ", count = " + accumulatedHistogram.getTotalCount()); System.out.println( "Interval Average latency for 5msec sleeps: " + intervalHistogram.getMean() + ", count = " + intervalHistogram.getTotalCount()); Assert.assertEquals( "Accumulated total count should be 2000", 2000, accumulatedHistogram.getTotalCount()); System.out.println("Pausing detector threads for 5 seconds:"); pauseDetector.stallDetectorThreads(0x7, 5000 * MSEC); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate // @ 15 sec from start // Report without forcing interval measurement update: System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:"); System.out.println( "Estimated interval = " + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime())); accumulatedHistogram = latencyStats.getAccumulatedHistogram(); intervalHistogram = latencyStats.getIntervalHistogram(); System.out.println( "Post-pause, pre-observation Accumulated Average latency for 5msec sleeps: " + accumulatedHistogram.getMean() + ", count = " + accumulatedHistogram.getTotalCount()); System.out.println( "Post-pause, pre-observation Interval Average latency for 5msec sleeps: " + intervalHistogram.getMean() + ", count = " + intervalHistogram.getTotalCount()); Assert.assertEquals( "Accumulated total count should be 2000", 2000, accumulatedHistogram.getTotalCount()); // Still @ 15 sec from start System.out.println("\nForcing Interval Update:\n"); latencyStats.forceIntervalSample(); System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:"); System.out.println( "Estimated interval = " + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime())); accumulatedHistogram = latencyStats.getAccumulatedHistogram(); intervalHistogram = latencyStats.getIntervalHistogram(); System.out.println( "Post-pause, post-observation Accumulated Average latency for 5msec sleeps: " + accumulatedHistogram.getMean() + ", count = " + accumulatedHistogram.getTotalCount()); System.out.println( "Post-pause, post-observation Interval Average latency for 5msec sleeps: " + intervalHistogram.getMean() + ", count = " + intervalHistogram.getTotalCount()); Assert.assertEquals( "Accumulated total count should be 2000", 2998, accumulatedHistogram.getTotalCount()); pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (500 * MSEC)); TimeServices.moveTimeForwardMsec(500); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate // @ 15.5 sec from start System.out.println("\nForcing Interval Update:\n"); latencyStats.forceIntervalSample(); System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:"); System.out.println( "Estimated interval = " + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime())); accumulatedHistogram = latencyStats.getAccumulatedHistogram(); intervalHistogram = latencyStats.getIntervalHistogram(); System.out.println( "Post-pause Accumulated Average latency for 5msec sleeps: " + accumulatedHistogram.getMean() + ", count = " + accumulatedHistogram.getTotalCount()); System.out.println( "Post-pause Interval Average latency for 5msec sleeps: " + intervalHistogram.getMean() + ", count = " + intervalHistogram.getTotalCount()); Assert.assertEquals( "Accumulated total count should be 2000", 2998, accumulatedHistogram.getTotalCount()); pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (500 * MSEC)); TimeServices.moveTimeForwardMsec(500); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate // @ 16 sec from start System.out.println("\nForcing Interval Update:\n"); latencyStats.forceIntervalSample(); System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:"); System.out.println( "Estimated interval = " + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime())); accumulatedHistogram = latencyStats.getAccumulatedHistogram(); intervalHistogram = latencyStats.getIntervalHistogram(); System.out.println( "Post-pause Accumulated Average latency for 5msec sleeps: " + accumulatedHistogram.getMean() + ", count = " + accumulatedHistogram.getTotalCount()); System.out.println( "Post-pause Interval Average latency for 5msec sleeps: " + intervalHistogram.getMean() + ", count = " + intervalHistogram.getTotalCount()); Assert.assertEquals( "Accumulated total count should be 2999", 2998, accumulatedHistogram.getTotalCount()); pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (2000 * MSEC)); TimeServices.moveTimeForwardMsec(2000); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate // @ 18 sec from start System.out.println("\nForcing Interval Update:\n"); latencyStats.forceIntervalSample(); System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:"); System.out.println( "Estimated interval = " + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime())); accumulatedHistogram = latencyStats.getAccumulatedHistogram(); intervalHistogram = latencyStats.getIntervalHistogram(); System.out.println( "Post-pause Accumulated Average latency for 5msec sleeps: " + accumulatedHistogram.getMean() + ", count = " + accumulatedHistogram.getTotalCount()); System.out.println( "Post-pause Interval Average latency for 5msec sleeps: " + intervalHistogram.getMean() + ", count = " + intervalHistogram.getTotalCount()); Assert.assertEquals( "Accumulated total count should be 2999", 2998, accumulatedHistogram.getTotalCount()); for (int i = 0; i < 100; i++) { pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (5 * MSEC)); TimeServices.moveTimeForwardMsec(5); long now = TimeServices.nanoTime(); latencyStats.recordLatency(now - lastTime); lastTime = now; } pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (500 * MSEC)); TimeServices.moveTimeForwardMsec(500); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate // @ 19 sec from start System.out.println("\nForcing Interval Update:\n"); latencyStats.forceIntervalSample(); System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:"); System.out.println( "Estimated interval = " + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime())); accumulatedHistogram = latencyStats.getAccumulatedHistogram(); intervalHistogram = latencyStats.getIntervalHistogram(); System.out.println( "Post-pause Accumulated Average latency for 5msec sleeps: " + accumulatedHistogram.getMean() + ", count = " + accumulatedHistogram.getTotalCount()); System.out.println( "Post-pause Interval Average latency for 5msec sleeps: " + intervalHistogram.getMean() + ", count = " + intervalHistogram.getTotalCount()); pauseDetector.skipConsensusTimeTo(TimeServices.nanoTime() + (500 * MSEC)); TimeServices.moveTimeForwardMsec(500); TimeUnit.NANOSECONDS.sleep(1 * MSEC); // Make sure things have some time to propagate // @ 19.5 sec from start System.out.println("\nForcing Interval Update:\n"); latencyStats.forceIntervalSample(); System.out.println("@ " + (TimeServices.nanoTime() - startTime) + " nsec after start:"); System.out.println( "Estimated interval = " + latencyStats.getIntervalEstimator().getEstimatedInterval(TimeServices.nanoTime())); accumulatedHistogram = latencyStats.getAccumulatedHistogram(); intervalHistogram = latencyStats.getIntervalHistogram(); System.out.println( "Post-pause Accumulated Average latency for 5msec sleeps: " + accumulatedHistogram.getMean() + ", count = " + accumulatedHistogram.getTotalCount()); System.out.println( "Post-pause Interval Average latency for 5msec sleeps: " + intervalHistogram.getMean() + ", count = " + intervalHistogram.getTotalCount()); Assert.assertEquals( "Accumulated total count should be 2000", 3098, accumulatedHistogram.getTotalCount()); } catch (InterruptedException ex) { } latencyStats.stop(); pauseDetector.shutdown(); }