@Test public void testNodeHealthScript() throws Exception { RecordFactory factory = RecordFactoryProvider.getRecordFactory(null); NodeHealthStatus healthStatus = factory.newRecordInstance(NodeHealthStatus.class); String errorScript = "echo ERROR\n echo \"Tracker not healthy\""; String normalScript = "echo \"I am all fine\""; String timeOutScript = Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\"" : "sleep 4\necho \"I am fine\""; Configuration conf = getConfForNodeHealthScript(); conf.writeXml(new FileOutputStream(nodeHealthConfigFile)); conf.addResource(nodeHealthConfigFile.getName()); writeNodeHealthScriptFile(normalScript, true); NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService(); nodeHealthChecker.init(conf); NodeHealthScriptRunner nodeHealthScriptRunner = nodeHealthChecker.getNodeHealthScriptRunner(); TimerTask timerTask = nodeHealthScriptRunner.getTimerTask(); timerTask.run(); setHealthStatus( healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking initial healthy condition"); // Check proper report conditions. Assert.assertTrue("Node health status reported unhealthy", healthStatus.getIsNodeHealthy()); Assert.assertTrue( "Node health status reported unhealthy", healthStatus.getHealthReport().equals(nodeHealthChecker.getHealthReport())); // write out error file. // Healthy to unhealthy transition writeNodeHealthScriptFile(errorScript, true); // Run timer timerTask.run(); // update health status setHealthStatus( healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking Healthy--->Unhealthy"); Assert.assertFalse("Node health status reported healthy", healthStatus.getIsNodeHealthy()); Assert.assertTrue( "Node health status reported healthy", healthStatus.getHealthReport().equals(nodeHealthChecker.getHealthReport())); // Check unhealthy to healthy transitions. writeNodeHealthScriptFile(normalScript, true); timerTask.run(); setHealthStatus( healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking UnHealthy--->healthy"); // Check proper report conditions. Assert.assertTrue("Node health status reported unhealthy", healthStatus.getIsNodeHealthy()); Assert.assertTrue( "Node health status reported unhealthy", healthStatus.getHealthReport().equals(nodeHealthChecker.getHealthReport())); // Healthy to timeout transition. writeNodeHealthScriptFile(timeOutScript, true); timerTask.run(); setHealthStatus( healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking Healthy--->timeout"); Assert.assertFalse( "Node health status reported healthy even after timeout", healthStatus.getIsNodeHealthy()); Assert.assertTrue( "Node script time out message not propogated", healthStatus .getHealthReport() .equals( NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG + NodeHealthCheckerService.SEPARATOR + nodeHealthChecker.getDiskHandler().getDisksHealthReport())); }
@Test public void testSuccessfulContainerLaunch() throws InterruptedException, IOException, YarnException { FileContext localFS = FileContext.getLocalFSFileContext(); localFS.delete(new Path(localDir.getAbsolutePath()), true); localFS.delete(new Path(localLogDir.getAbsolutePath()), true); localFS.delete(new Path(remoteLogDir.getAbsolutePath()), true); localDir.mkdir(); localLogDir.mkdir(); remoteLogDir.mkdir(); YarnConfiguration conf = new YarnConfiguration(); Context context = new NMContext( new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, null, new NMNullStateStoreService()) { @Override public int getHttpPort() { return 1234; } }; conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath()); ContainerExecutor exec = new DefaultContainerExecutor(); exec.setConf(conf); DeletionService del = new DeletionService(exec); Dispatcher dispatcher = new AsyncDispatcher(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); healthChecker.init(conf); NodeManagerMetrics metrics = NodeManagerMetrics.create(); NodeStatusUpdater nodeStatusUpdater = new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics) { @Override protected ResourceTracker getRMClient() { return new LocalRMInterface(); }; @Override protected void stopRMProxy() { return; } @Override protected void startStatusUpdater() { return; // Don't start any updating thread. } @Override public long getRMIdentifier() { return SIMULATED_RM_IDENTIFIER; } }; DummyContainerManager containerManager = new DummyContainerManager( context, exec, del, nodeStatusUpdater, metrics, new ApplicationACLsManager(conf), dirsHandler); nodeStatusUpdater.init(conf); ((NMContext) context).setContainerManager(containerManager); nodeStatusUpdater.start(); containerManager.init(conf); containerManager.start(); ContainerLaunchContext launchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); ApplicationId applicationId = ApplicationId.newInstance(0, 0); ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, 0); ContainerId cID = ContainerId.newContainerId(applicationAttemptId, 0); String user = "******"; StartContainerRequest scRequest = StartContainerRequest.newInstance( launchContext, TestContainerManager.createContainerToken( cID, SIMULATED_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager())); List<StartContainerRequest> list = new ArrayList<StartContainerRequest>(); list.add(scRequest); StartContainersRequest allRequests = StartContainersRequest.newInstance(list); containerManager.startContainers(allRequests); BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.RUNNING); List<ContainerId> containerIds = new ArrayList<ContainerId>(); containerIds.add(cID); StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds); containerManager.stopContainers(stopRequest); BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.COMPLETE); containerManager.stop(); }
@Test public void testSuccessfulContainerLaunch() throws InterruptedException, IOException { FileContext localFS = FileContext.getLocalFSFileContext(); localFS.delete(new Path(localDir.getAbsolutePath()), true); localFS.delete(new Path(localLogDir.getAbsolutePath()), true); localFS.delete(new Path(remoteLogDir.getAbsolutePath()), true); localDir.mkdir(); localLogDir.mkdir(); remoteLogDir.mkdir(); Context context = new NMContext(); YarnConfiguration conf = new YarnConfiguration(); conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath()); ContainerExecutor exec = new DefaultContainerExecutor(); exec.setConf(conf); DeletionService del = new DeletionService(exec); Dispatcher dispatcher = new AsyncDispatcher(); NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); NodeManagerMetrics metrics = NodeManagerMetrics.create(); ContainerTokenSecretManager containerTokenSecretManager = new ContainerTokenSecretManager(); NodeStatusUpdater nodeStatusUpdater = new NodeStatusUpdaterImpl( context, dispatcher, healthChecker, metrics, containerTokenSecretManager) { @Override protected ResourceTracker getRMClient() { return new LocalRMInterface(); }; @Override protected void startStatusUpdater() { return; // Don't start any updating thread. } }; DummyContainerManager containerManager = new DummyContainerManager( context, exec, del, nodeStatusUpdater, metrics, containerTokenSecretManager, new ApplicationACLsManager(conf), dirsHandler); containerManager.init(conf); containerManager.start(); ContainerLaunchContext launchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); ContainerId cID = recordFactory.newRecordInstance(ContainerId.class); ApplicationId applicationId = recordFactory.newRecordInstance(ApplicationId.class); applicationId.setClusterTimestamp(0); applicationId.setId(0); ApplicationAttemptId applicationAttemptId = recordFactory.newRecordInstance(ApplicationAttemptId.class); applicationAttemptId.setApplicationId(applicationId); applicationAttemptId.setAttemptId(0); cID.setApplicationAttemptId(applicationAttemptId); launchContext.setContainerId(cID); launchContext.setUser("testing"); launchContext.setResource(recordFactory.newRecordInstance(Resource.class)); StartContainerRequest request = recordFactory.newRecordInstance(StartContainerRequest.class); request.setContainerLaunchContext(launchContext); containerManager.startContainer(request); BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.RUNNING); StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class); stopRequest.setContainerId(cID); containerManager.stopContainer(stopRequest); BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.COMPLETE); containerManager.stop(); }