/** @param log Logger. */ private void execute(GridLogger log) { try { log.info("Started execute."); // Countdown shared job latch so that the main thread know that all jobs are // inside the "execute" routine. jobLatch.countDown(); log.info("After job latch."); // Await for the main thread to allow jobs to proceed. latch.await(); log.info("After latch."); if (awaitMasterLeaveCallback) { latch0.await(); log.info("After latch0."); } else log.info("Latch 0 skipped."); } catch (InterruptedException e) { // We do not expect any interruptions here, hence this statement. fail("Unexpected exception: " + e); } }
/** @throws Exception If failed. */ @SuppressWarnings("unchecked") public void testCancel() throws Exception { Grid grid = G.grid(getTestGridName()); grid.compute() .localDeployTask(GridCancelTestTask.class, U.detectClassLoader(GridCancelTestTask.class)); GridComputeTaskFuture<?> fut = grid.compute().execute(GridCancelTestTask.class.getName(), null); // Wait until jobs begin execution. boolean await = startSignal.await(WAIT_TIME, TimeUnit.MILLISECONDS); assert await : "Jobs did not start."; info("Test task result: " + fut); assert fut != null; // Only first job should successfully complete. Object res = fut.get(); assert (Integer) res == 1; // Wait for all jobs to finish. await = stopSignal.await(WAIT_TIME, TimeUnit.MILLISECONDS); assert await : "Jobs did not stop."; // One is definitely processed. But there might be some more processed or cancelled or processed // and cancelled. // Thus total number should be at least SPLIT_COUNT and at most (SPLIT_COUNT - 1) *2 +1 assert (cancelCnt + processedCnt) >= SPLIT_COUNT && (cancelCnt + processedCnt) <= (SPLIT_COUNT - 1) * 2 + 1 : "Invalid cancel count value: " + cancelCnt; }
/** * @param expJobs Expected jobs number. * @param taskStarter Task started. * @throws Exception If failed. */ private void testMasterLeaveAwareCallback( int expJobs, GridClosure<GridProjection, GridFuture<?>> taskStarter) throws Exception { jobLatch = new CountDownLatch(expJobs); invokeLatch = new CountDownLatch(expJobs); for (int i = 0; i < GRID_CNT; i++) startGrid(i); int lastGridIdx = GRID_CNT - 1; GridFuture<?> fut = taskStarter.apply(grid(lastGridIdx).forPredicate(excludeLastPredicate())); jobLatch.await(); stopGrid(lastGridIdx, true); latch.countDown(); assert invokeLatch.await(5000, MILLISECONDS); try { fut.get(); } catch (GridException e) { log.debug("Task failed: " + e); } }
/** * Ensure that {@link GridComputeJobMasterLeaveAware} callback is invoked when fails to send * {@link GridJobExecuteResponse} to master node. * * @throws Exception If failed. */ public void testCannotSendJobExecuteResponse() throws Exception { awaitMasterLeaveCallback = false; // Start grids. for (int i = 0; i < GRID_CNT; i++) startGrid(i); int lastGridIdx = GRID_CNT - 1; grid(lastGridIdx) .forPredicate(excludeLastPredicate()) .compute() .execute(new TestTask(GRID_CNT - 1), null); jobLatch.await(); for (int i = 0; i < lastGridIdx; i++) ((CommunicationSpi) grid(i).configuration().getCommunicationSpi()).waitLatch(); latch.countDown(); // Ensure that all worker nodes has already started job response sending. for (int i = 0; i < lastGridIdx; i++) ((CommunicationSpi) grid(i).configuration().getCommunicationSpi()).awaitResponse(); // Now we stop master grid. stopGrid(lastGridIdx, true); // Release communication SPI wait latches. As master node is stopped, job worker will receive // and exception. for (int i = 0; i < lastGridIdx; i++) ((CommunicationSpi) grid(i).configuration().getCommunicationSpi()).releaseWaitLatch(); assert invokeLatch.await(5000, MILLISECONDS); }
/** * Ensure that {@link GridComputeJobMasterLeaveAware} callback is invoked on job which is * initiated by master and is currently running on it. * * @throws Exception If failed. */ public void testLocalJobOnMaster() throws Exception { invokeLatch = new CountDownLatch(1); jobLatch = new CountDownLatch(1); Grid g = startGrid(0); g.compute().execute(new TestTask(1), null); jobLatch.await(); // Count down the latch in a separate thread. new Thread( new Runnable() { @Override public void run() { try { U.sleep(500); } catch (GridInterruptedException ignore) { // No-op. } latch.countDown(); } }) .start(); stopGrid(0, true); latch.countDown(); assert invokeLatch.await(5000, MILLISECONDS); }
/** @throws Exception If failed. */ public void testTopologyListener() throws Exception { final Collection<UUID> added = new ArrayList<>(1); final Collection<UUID> rmvd = new ArrayList<>(1); final CountDownLatch addedLatch = new CountDownLatch(1); final CountDownLatch rmvLatch = new CountDownLatch(1); assertEquals(NODES_CNT, client.compute().refreshTopology(false, false).size()); GridClientTopologyListener lsnr = new GridClientTopologyListener() { @Override public void onNodeAdded(GridClientNode node) { added.add(node.nodeId()); addedLatch.countDown(); } @Override public void onNodeRemoved(GridClientNode node) { rmvd.add(node.nodeId()); rmvLatch.countDown(); } }; client.addTopologyListener(lsnr); try { Grid g = startGrid(NODES_CNT + 1); UUID id = g.localNode().id(); assertTrue(addedLatch.await(2 * TOP_REFRESH_FREQ, MILLISECONDS)); assertEquals(1, added.size()); assertEquals(id, F.first(added)); stopGrid(NODES_CNT + 1); assertTrue(rmvLatch.await(2 * TOP_REFRESH_FREQ, MILLISECONDS)); assertEquals(1, rmvd.size()); assertEquals(id, F.first(rmvd)); } finally { client.removeTopologyListener(lsnr); stopGrid(NODES_CNT + 1); } }
/** {@inheritDoc} */ @Override public Serializable execute() { int arg = this.<Integer>argument(0); try { if (log.isInfoEnabled()) log.info("Executing job [job=" + this + ", arg=" + arg + ']'); startSignal.countDown(); try { if (!startSignal.await(WAIT_TIME, TimeUnit.MILLISECONDS)) fail(); if (arg == 1) { if (log.isInfoEnabled()) log.info("Job one is proceeding."); } else Thread.sleep(WAIT_TIME); } catch (InterruptedException e) { if (log.isInfoEnabled()) log.info("Job got cancelled [arg=" + arg + ", ses=" + ses + ", e=" + e + ']'); return 0; } if (log.isInfoEnabled()) log.info("Completing job: " + ses); return argument(0); } finally { stopSignal.countDown(); processedCnt++; } }
/** * Ensure that {@link GridComputeJobMasterLeaveAware} callback is invoked when master node leaves * topology normally. * * @throws Exception If failed. */ public void testMasterStoppedNormally() throws Exception { // Start grids. for (int i = 0; i < GRID_CNT; i++) startGrid(i); int lastGridIdx = GRID_CNT - 1; grid(lastGridIdx) .forPredicate(excludeLastPredicate()) .compute() .execute(new TestTask(GRID_CNT - 1), null); jobLatch.await(); stopGrid(lastGridIdx, true); latch.countDown(); assert invokeLatch.await(5000, MILLISECONDS); }
/** * Ensure that {@link GridComputeJobMasterLeaveAware} callback is invoked when master node leaves * topology abruptly (e.g. due to a network failure or immediate node shutdown). * * @throws Exception If failed. */ public void testMasterStoppedAbruptly() throws Exception { // Start grids. for (int i = 0; i < GRID_CNT; i++) startGrid(i); int lastGridIdx = GRID_CNT - 1; grid(lastGridIdx) .forPredicate(excludeLastPredicate()) .compute() .execute(new TestTask(GRID_CNT - 1), null); jobLatch.await(); ((CommunicationSpi) grid(lastGridIdx).configuration().getCommunicationSpi()).blockMessages(); stopGrid(lastGridIdx, true); latch.countDown(); assert invokeLatch.await(5000, MILLISECONDS); }