public NodeHeartbeatResponse nodeHeartbeat( Map<ApplicationId, List<ContainerStatus>> conts, boolean isHealthy, int resId) throws Exception { NodeHeartbeatRequest req = Records.newRecord(NodeHeartbeatRequest.class); NodeStatus status = Records.newRecord(NodeStatus.class); status.setResponseId(resId); status.setNodeId(nodeId); for (Map.Entry<ApplicationId, List<ContainerStatus>> entry : conts.entrySet()) { Log.info("entry.getValue() " + entry.getValue()); status.setContainersStatuses(entry.getValue()); } NodeHealthStatus healthStatus = Records.newRecord(NodeHealthStatus.class); healthStatus.setHealthReport(""); healthStatus.setIsNodeHealthy(isHealthy); healthStatus.setLastHealthReportTime(1); status.setNodeHealthStatus(healthStatus); req.setNodeStatus(status); req.setLastKnownContainerTokenMasterKey(this.currentContainerTokenMasterKey); req.setLastKnownNMTokenMasterKey(this.currentNMTokenMasterKey); NodeHeartbeatResponse heartbeatResponse = resourceTracker.nodeHeartbeat(req); MasterKey masterKeyFromRM = heartbeatResponse.getContainerTokenMasterKey(); if (masterKeyFromRM != null && masterKeyFromRM.getKeyId() != this.currentContainerTokenMasterKey.getKeyId()) { this.currentContainerTokenMasterKey = masterKeyFromRM; } masterKeyFromRM = heartbeatResponse.getNMTokenMasterKey(); if (masterKeyFromRM != null && masterKeyFromRM.getKeyId() != this.currentNMTokenMasterKey.getKeyId()) { this.currentNMTokenMasterKey = masterKeyFromRM; } return heartbeatResponse; }
private void setHealthStatus( NodeHealthStatus healthStatus, boolean isHealthy, String healthReport, long lastHealthReportTime) { healthStatus.setHealthReport(healthReport); healthStatus.setIsNodeHealthy(isHealthy); healthStatus.setLastHealthReportTime(lastHealthReportTime); }
private RMNodeImpl getUnhealthyNode() throws IOException { RMNodeImpl node = getRunningNode(); NodeHealthStatus status = NodeHealthStatus.newInstance(false, "sick", System.currentTimeMillis()); node.handle( new RMNodeStatusEvent( node.getNodeID(), status, new ArrayList<ContainerStatus>(), null, null, new TransactionStateImpl(TransactionType.RM))); Assert.assertEquals(NodeState.UNHEALTHY, node.getState()); return node; }
@Test public void testNodeHealthScript() throws Exception { RecordFactory factory = RecordFactoryProvider.getRecordFactory(null); NodeHealthStatus healthStatus = factory.newRecordInstance(NodeHealthStatus.class); String errorScript = "echo ERROR\n echo \"Tracker not healthy\""; String normalScript = "echo \"I am all fine\""; String timeOutScript = Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\"" : "sleep 4\necho \"I am fine\""; Configuration conf = getConfForNodeHealthScript(); conf.writeXml(new FileOutputStream(nodeHealthConfigFile)); conf.addResource(nodeHealthConfigFile.getName()); writeNodeHealthScriptFile(normalScript, true); NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService(); nodeHealthChecker.init(conf); NodeHealthScriptRunner nodeHealthScriptRunner = nodeHealthChecker.getNodeHealthScriptRunner(); TimerTask timerTask = nodeHealthScriptRunner.getTimerTask(); timerTask.run(); setHealthStatus( healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking initial healthy condition"); // Check proper report conditions. Assert.assertTrue("Node health status reported unhealthy", healthStatus.getIsNodeHealthy()); Assert.assertTrue( "Node health status reported unhealthy", healthStatus.getHealthReport().equals(nodeHealthChecker.getHealthReport())); // write out error file. // Healthy to unhealthy transition writeNodeHealthScriptFile(errorScript, true); // Run timer timerTask.run(); // update health status setHealthStatus( healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking Healthy--->Unhealthy"); Assert.assertFalse("Node health status reported healthy", healthStatus.getIsNodeHealthy()); Assert.assertTrue( "Node health status reported healthy", healthStatus.getHealthReport().equals(nodeHealthChecker.getHealthReport())); // Check unhealthy to healthy transitions. writeNodeHealthScriptFile(normalScript, true); timerTask.run(); setHealthStatus( healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking UnHealthy--->healthy"); // Check proper report conditions. Assert.assertTrue("Node health status reported unhealthy", healthStatus.getIsNodeHealthy()); Assert.assertTrue( "Node health status reported unhealthy", healthStatus.getHealthReport().equals(nodeHealthChecker.getHealthReport())); // Healthy to timeout transition. writeNodeHealthScriptFile(timeOutScript, true); timerTask.run(); setHealthStatus( healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking Healthy--->timeout"); Assert.assertFalse( "Node health status reported healthy even after timeout", healthStatus.getIsNodeHealthy()); Assert.assertTrue( "Node script time out message not propogated", healthStatus .getHealthReport() .equals( NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG + NodeHealthCheckerService.SEPARATOR + nodeHealthChecker.getDiskHandler().getDisksHealthReport())); }