@Test public void testAppDataPush() throws Exception { final String topic = "xyz"; final List<String> messages = new ArrayList<>(); EmbeddedWebSocketServer server = new EmbeddedWebSocketServer(0); server.setWebSocket( new WebSocket.OnTextMessage() { @Override public void onMessage(String data) { messages.add(data); } @Override public void onOpen(WebSocket.Connection connection) {} @Override public void onClose(int closeCode, String message) {} }); try { server.start(); int port = server.getPort(); TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class); GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class); dag.addStream("o1.outport", o1.outport, o2.inport1); dag.setAttribute(LogicalPlan.METRICS_TRANSPORT, new AutoMetricBuiltInTransport(topic)); dag.setAttribute(LogicalPlan.GATEWAY_CONNECT_ADDRESS, "localhost:" + port); dag.setAttribute(LogicalPlan.PUBSUB_CONNECT_TIMEOUT_MILLIS, 2000); StramLocalCluster lc = new StramLocalCluster(dag); StreamingContainerManager dnmgr = lc.dnmgr; StramAppContext appContext = new StramTestSupport.TestAppContext(dag.getAttributes()); AppDataPushAgent pushAgent = new AppDataPushAgent(dnmgr, appContext); pushAgent.init(); pushAgent.pushData(); Thread.sleep(1000); Assert.assertTrue(messages.size() > 0); pushAgent.close(); JSONObject message = new JSONObject(messages.get(0)); Assert.assertEquals(topic, message.getString("topic")); Assert.assertEquals("publish", message.getString("type")); JSONObject data = message.getJSONObject("data"); Assert.assertTrue(StringUtils.isNotBlank(data.getString("appId"))); Assert.assertTrue(StringUtils.isNotBlank(data.getString("appUser"))); Assert.assertTrue(StringUtils.isNotBlank(data.getString("appName"))); JSONObject logicalOperators = data.getJSONObject("logicalOperators"); for (String opName : new String[] {"o1", "o2"}) { JSONObject opObj = logicalOperators.getJSONObject(opName); Assert.assertTrue(opObj.has("totalTuplesProcessed")); Assert.assertTrue(opObj.has("totalTuplesEmitted")); Assert.assertTrue(opObj.has("tuplesProcessedPSMA")); Assert.assertTrue(opObj.has("tuplesEmittedPSMA")); Assert.assertTrue(opObj.has("latencyMA")); } } finally { server.stop(); } }
@Test public void testValidInputOperatorDeployInfoType() { TestGeneratorInputOperator.ValidInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.ValidInputOperator.class); GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class); dag.addStream("stream1", o1.outport, o2.inport1); dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent()); StreamingContainerManager scm = new StreamingContainerManager(dag); PhysicalPlan physicalPlan = scm.getPhysicalPlan(); List<PTContainer> containers = physicalPlan.getContainers(); for (int i = 0; i < containers.size(); ++i) { assignContainer(scm, "container" + (i + 1)); } OperatorMeta o1Meta = dag.getMeta(o1); PTOperator o1Physical = physicalPlan.getOperators(o1Meta).get(0); String containerId = o1Physical.getContainer().getExternalId(); OperatorDeployInfo o1DeployInfo = getDeployInfo(scm.getContainerAgent(containerId)).get(0); Assert.assertEquals( "type " + o1DeployInfo, OperatorDeployInfo.OperatorType.INPUT, o1DeployInfo.type); }
@Test public void testOperatorFailureRecovery() throws Exception { LogicalPlan dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.toURI().toString()); FailingOperator badOperator = dag.addOperator("badOperator", FailingOperator.class); dag.getContextAttributes(badOperator).put(OperatorContext.RECOVERY_ATTEMPTS, 1); LOG.info("Initializing Client"); StramClient client = new StramClient(conf, dag); if (StringUtils.isBlank(System.getenv("JAVA_HOME"))) { client.javaCmd = "java"; // JAVA_HOME not set in the yarn mini cluster } try { client.start(); client.startApplication(); client.setClientTimeout(120000); boolean result = client.monitorApplication(); LOG.info("Client run completed. Result=" + result); Assert.assertFalse("should fail", result); ApplicationReport ar = client.getApplicationReport(); Assert.assertEquals( "should fail", FinalApplicationStatus.FAILED, ar.getFinalApplicationStatus()); // unable to get the diagnostics message set by the AM here - see YARN-208 // diagnostics message does not make it here even with Hadoop 2.2 (but works on standalone // cluster) // Assert.assertTrue("appReport " + ar, ar.getDiagnostics().contains("badOperator")); } finally { client.stop(); } }
private LogicalPlan createDAG(LogicalPlanConfiguration lpc) throws Exception { LogicalPlan dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.toURI().toString()); lpc.prepareDAG(dag, null, "testApp"); dag.validate(); Assert.assertEquals("", Integer.valueOf(128), dag.getValue(DAG.MASTER_MEMORY_MB)); Assert.assertEquals( "", "-Dlog4j.properties=custom_log4j.properties", dag.getValue(DAG.CONTAINER_JVM_OPTIONS)); return dag; }
private void testDownStreamPartition(Locality locality) throws Exception { TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class); GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class); dag.setAttribute( o2, OperatorContext.PARTITIONER, new StatelessPartitioner<GenericTestOperator>(2)); dag.addStream("o1Output1", o1.outport, o2.inport1).setLocality(locality); int maxContainers = 5; dag.setAttribute(LogicalPlan.CONTAINERS_MAX_COUNT, maxContainers); dag.setAttribute(OperatorContext.STORAGE_AGENT, new StramTestSupport.MemoryStorageAgent()); dag.validate(); PhysicalPlan plan = new PhysicalPlan(dag, new TestPlanContext()); Assert.assertEquals("number of containers", 1, plan.getContainers().size()); PTContainer container1 = plan.getContainers().get(0); Assert.assertEquals("number operators " + container1, 3, container1.getOperators().size()); StramLocalCluster slc = new StramLocalCluster(dag); slc.run(5000); }
private void testPhysicalPlanSerialization(StorageAgent agent) throws Exception { LogicalPlan dag = new LogicalPlan(); GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class); PartitioningTestOperator o2 = dag.addOperator("o2", PartitioningTestOperator.class); o2.setPartitionCount(3); GenericTestOperator o3 = dag.addOperator("o3", GenericTestOperator.class); dag.addStream("o1.outport1", o1.outport1, o2.inport1, o2.inportWithCodec); dag.addStream("mergeStream", o2.outport1, o3.inport1); dag.getAttributes().put(LogicalPlan.CONTAINERS_MAX_COUNT, 2); TestPlanContext ctx = new TestPlanContext(); dag.setAttribute(OperatorContext.STORAGE_AGENT, agent); PhysicalPlan plan = new PhysicalPlan(dag, ctx); ByteArrayOutputStream bos = new ByteArrayOutputStream(); LogicalPlan.write(dag, bos); LOG.debug("logicalPlan size: " + bos.toByteArray().length); bos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(bos); oos.writeObject(plan); LOG.debug("physicalPlan size: " + bos.toByteArray().length); ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); plan = (PhysicalPlan) new ObjectInputStream(bis).readObject(); dag = plan.getLogicalPlan(); Field f = PhysicalPlan.class.getDeclaredField("ctx"); f.setAccessible(true); f.set(plan, ctx); f.setAccessible(false); OperatorMeta o2Meta = dag.getOperatorMeta("o2"); List<PTOperator> o2Partitions = plan.getOperators(o2Meta); assertEquals(3, o2Partitions.size()); for (PTOperator o : o2Partitions) { Assert.assertNotNull("partition null " + o, o.getPartitionKeys()); assertEquals( "partition keys " + o + " " + o.getPartitionKeys(), 2, o.getPartitionKeys().size()); PartitioningTestOperator partitionedInstance = (PartitioningTestOperator) plan.loadOperator(o); assertEquals( "instance per partition", o.getPartitionKeys().values().toString(), partitionedInstance.pks); Assert.assertNotNull("partition stats null " + o, o.stats); } }
@Test public void testOutofSequenceError() throws Exception { logger.info("Testing Out of Sequence Error"); LogicalPlan dag = new LogicalPlan(); String workingDir = new File("target/testOutofSequenceError").getAbsolutePath(); dag.setAttribute( Context.OperatorContext.STORAGE_AGENT, new AsyncFSStorageAgent(workingDir, null)); RandomNumberGenerator rng = dag.addOperator("random", new RandomNumberGenerator()); MyLogger ml = dag.addOperator("logger", new MyLogger()); dag.addStream("stream", rng.output, ml.input); StramLocalCluster lc = new StramLocalCluster(dag); lc.run(10000); }
@Test public void testRecoveryOrder() throws Exception { GenericTestOperator node1 = dag.addOperator("node1", GenericTestOperator.class); GenericTestOperator node2 = dag.addOperator("node2", GenericTestOperator.class); GenericTestOperator node3 = dag.addOperator("node3", GenericTestOperator.class); dag.addStream("n1n2", node1.outport1, node2.inport1); dag.addStream("n2n3", node2.outport1, node3.inport1); dag.getAttributes().put(LogicalPlan.CONTAINERS_MAX_COUNT, 2); dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent()); StreamingContainerManager scm = new StreamingContainerManager(dag); Assert.assertEquals("" + scm.containerStartRequests, 2, scm.containerStartRequests.size()); scm.containerStartRequests.clear(); PhysicalPlan plan = scm.getPhysicalPlan(); List<PTContainer> containers = plan.getContainers(); Assert.assertEquals("" + containers, 2, plan.getContainers().size()); PTContainer c1 = containers.get(0); Assert.assertEquals("c1.operators " + c1.getOperators(), 2, c1.getOperators().size()); PTContainer c2 = containers.get(1); Assert.assertEquals("c2.operators " + c2.getOperators(), 1, c2.getOperators().size()); assignContainer(scm, "container1"); assignContainer(scm, "container2"); StreamingContainerAgent sca1 = scm.getContainerAgent(c1.getExternalId()); StreamingContainerAgent sca2 = scm.getContainerAgent(c2.getExternalId()); Assert.assertEquals("", 0, countState(sca1.container, PTOperator.State.PENDING_UNDEPLOY)); Assert.assertEquals("", 2, countState(sca1.container, PTOperator.State.PENDING_DEPLOY)); scm.scheduleContainerRestart(c1.getExternalId()); Assert.assertEquals("", 0, countState(sca1.container, PTOperator.State.PENDING_UNDEPLOY)); Assert.assertEquals("", 2, countState(sca1.container, PTOperator.State.PENDING_DEPLOY)); Assert.assertEquals("" + scm.containerStartRequests, 1, scm.containerStartRequests.size()); ContainerStartRequest dr = scm.containerStartRequests.peek(); Assert.assertNotNull(dr); Assert.assertEquals( "" + sca2.container, 1, countState(sca2.container, PTOperator.State.PENDING_UNDEPLOY)); Assert.assertEquals( "" + sca2.container, 0, countState(sca2.container, PTOperator.State.PENDING_DEPLOY)); }
@Test public void testCustomMetricsTransport() throws Exception { TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class); GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class); dag.addStream("o1.outport", o1.outport, o2.inport1); dag.setAttribute(LogicalPlan.METRICS_TRANSPORT, new TestMetricTransport("xyz")); StramLocalCluster lc = new StramLocalCluster(dag); StreamingContainerManager dnmgr = lc.dnmgr; StramAppContext appContext = new StramTestSupport.TestAppContext(dag.getAttributes()); AppDataPushAgent pushAgent = new AppDataPushAgent(dnmgr, appContext); pushAgent.init(); pushAgent.pushData(); Assert.assertTrue(TestMetricTransport.messages.size() > 0); pushAgent.close(); String msg = TestMetricTransport.messages.get(0); Assert.assertTrue(msg.startsWith("xyz:")); }
@Test public void testLatency() throws Exception { TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class); GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class); HighLatencyTestOperator o3 = dag.addOperator("o3", HighLatencyTestOperator.class); GenericTestOperator o4 = dag.addOperator("o4", GenericTestOperator.class); long latency = 5000; // 5 seconds o3.setLatency(latency); dag.addStream("o1.outport", o1.outport, o2.inport1, o3.inport1); dag.addStream("o2.outport1", o2.outport1, o4.inport1); dag.addStream("o3.outport1", o3.outport1, o4.inport2); dag.setAttribute(Context.DAGContext.STATS_MAX_ALLOWABLE_WINDOWS_LAG, 2); // 1 second StramLocalCluster lc = new StramLocalCluster(dag); StreamingContainerManager dnmgr = lc.dnmgr; lc.runAsync(); Thread.sleep(10000); LogicalOperatorInfo o1Info = dnmgr.getLogicalOperatorInfo("o1"); LogicalOperatorInfo o2Info = dnmgr.getLogicalOperatorInfo("o2"); LogicalOperatorInfo o3Info = dnmgr.getLogicalOperatorInfo("o3"); LogicalOperatorInfo o4Info = dnmgr.getLogicalOperatorInfo("o4"); Assert.assertEquals("Input operator latency must be zero", 0, o1Info.latencyMA); Assert.assertTrue("Latency must be greater than or equal to zero", o2Info.latencyMA >= 0); Assert.assertTrue( "Actual latency must be greater than the artificially introduced latency", o3Info.latencyMA > latency); Assert.assertTrue("Latency must be greater than or equal to zero", o4Info.latencyMA >= 0); StreamingContainerManager.CriticalPathInfo criticalPathInfo = dnmgr.getCriticalPathInfo(); Assert.assertArrayEquals( "Critical Path must be the path in the DAG that includes the HighLatencyTestOperator", new Integer[] { o1Info.partitions.iterator().next(), o3Info.partitions.iterator().next(), o4Info.partitions.iterator().next() }, criticalPathInfo.path.toArray()); Assert.assertTrue( "Whole DAG latency must be greater than the artificially introduced latency", criticalPathInfo.latency > latency); lc.shutdown(); }
@Test public void testRecoveryUpstreamInline() throws Exception { GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class); GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class); GenericTestOperator o3 = dag.addOperator("o3", GenericTestOperator.class); dag.addStream("o1o3", o1.outport1, o3.inport1); dag.addStream("o2o3", o2.outport1, o3.inport2); dag.getAttributes().put(LogicalPlan.CONTAINERS_MAX_COUNT, 2); dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent()); StreamingContainerManager scm = new StreamingContainerManager(dag); PhysicalPlan plan = scm.getPhysicalPlan(); Assert.assertEquals(2, plan.getContainers().size()); plan.getOperators(dag.getMeta(o1)).get(0); Assert.assertEquals(2, plan.getContainers().size()); PTContainer c1 = plan.getContainers().get(0); Assert.assertEquals( Sets.newHashSet( plan.getOperators(dag.getMeta(o1)).get(0), plan.getOperators(dag.getMeta(o3)).get(0)), Sets.newHashSet(c1.getOperators())); PTContainer c2 = plan.getContainers().get(1); assignContainer(scm, "c1"); assignContainer(scm, "c2"); for (PTOperator oper : c1.getOperators()) { Assert.assertEquals("state " + oper, PTOperator.State.PENDING_DEPLOY, oper.getState()); } scm.scheduleContainerRestart(c2.getExternalId()); for (PTOperator oper : c1.getOperators()) { Assert.assertEquals("state " + oper, PTOperator.State.PENDING_UNDEPLOY, oper.getState()); } }
private void testRestartApp(StorageAgent agent, String appPath1) throws Exception { FileUtils.deleteDirectory(new File(testMeta.dir)); // clean any state from previous run String appId1 = "app1"; String appId2 = "app2"; String appPath2 = testMeta.dir + "/" + appId2; LogicalPlan dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_ID, appId1); dag.setAttribute(LogicalPlan.APPLICATION_PATH, appPath1); dag.setAttribute(LogicalPlan.APPLICATION_ATTEMPT_ID, 1); dag.setAttribute(OperatorContext.STORAGE_AGENT, agent); dag.addOperator("o1", StatsListeningOperator.class); FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)); StreamingContainerManager.getInstance(recoveryHandler, dag, false); // test restore initial snapshot + log dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, appPath1); StreamingContainerManager scm = StreamingContainerManager.getInstance( new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)), dag, false); PhysicalPlan plan = scm.getPhysicalPlan(); dag = plan.getLogicalPlan(); // original plan Assert.assertNotNull("operator", dag.getOperatorMeta("o1")); PTOperator o1p1 = plan.getOperators(dag.getOperatorMeta("o1")).get(0); long[] ids = new FSStorageAgent(appPath1 + "/" + LogicalPlan.SUBDIR_CHECKPOINTS, new Configuration()) .getWindowIds(o1p1.getId()); Assert.assertArrayEquals(new long[] {o1p1.getRecoveryCheckpoint().getWindowId()}, ids); Assert.assertNull(o1p1.getContainer().getExternalId()); // trigger journal write o1p1.getContainer().setExternalId("cid1"); scm.writeJournal(o1p1.getContainer().getSetContainerState()); dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, appPath2); dag.setAttribute(LogicalPlan.APPLICATION_ID, appId2); StramClient sc = new StramClient(new Configuration(), dag); try { sc.start(); sc.copyInitialState(new Path(appPath1)); } finally { sc.stop(); } scm = StreamingContainerManager.getInstance( new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)), dag, false); plan = scm.getPhysicalPlan(); dag = plan.getLogicalPlan(); assertEquals("modified appId", appId2, dag.getValue(LogicalPlan.APPLICATION_ID)); assertEquals("modified appPath", appPath2, dag.getValue(LogicalPlan.APPLICATION_PATH)); Assert.assertNotNull("operator", dag.getOperatorMeta("o1")); o1p1 = plan.getOperators(dag.getOperatorMeta("o1")).get(0); assertEquals("journal copied", "cid1", o1p1.getContainer().getExternalId()); ids = new FSStorageAgent(appPath2 + "/" + LogicalPlan.SUBDIR_CHECKPOINTS, new Configuration()) .getWindowIds(o1p1.getId()); Assert.assertArrayEquals( "checkpoints copied", new long[] {o1p1.getRecoveryCheckpoint().getWindowId()}, ids); }
@Test public void testWriteAheadLog() throws Exception { final MutableInt flushCount = new MutableInt(); final MutableBoolean isClosed = new MutableBoolean(false); LogicalPlan dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.dir); dag.setAttribute(OperatorContext.STORAGE_AGENT, new FSStorageAgent(testMeta.dir, null)); TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class); StreamingContainerManager scm = new StreamingContainerManager(dag); PhysicalPlan plan = scm.getPhysicalPlan(); Journal j = scm.getJournal(); ByteArrayOutputStream bos = new ByteArrayOutputStream() { @Override public void flush() throws IOException { super.flush(); flushCount.increment(); } @Override public void close() throws IOException { super.close(); isClosed.setValue(true); } }; j.setOutputStream(new DataOutputStream(bos)); PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0); assertEquals(PTOperator.State.PENDING_DEPLOY, o1p1.getState()); String externalId = new MockContainer(scm, o1p1.getContainer()).container.getExternalId(); assertEquals("flush count", 1, flushCount.intValue()); o1p1.setState(PTOperator.State.ACTIVE); assertEquals(PTOperator.State.ACTIVE, o1p1.getState()); assertEquals("flush count", 2, flushCount.intValue()); assertEquals("is closed", false, isClosed.booleanValue()); // this will close the stream. There are 2 calls to flush() during the close() - one in Kryo // Output and one // in FilterOutputStream j.setOutputStream(null); assertEquals("flush count", 4, flushCount.intValue()); assertEquals("is closed", true, isClosed.booleanValue()); // output stream is closed, so state will be changed without recording it in the journal o1p1.setState(PTOperator.State.INACTIVE); assertEquals(PTOperator.State.INACTIVE, o1p1.getState()); assertEquals("flush count", 4, flushCount.intValue()); ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); j.replay(new DataInputStream(bis)); assertEquals(PTOperator.State.ACTIVE, o1p1.getState()); InetSocketAddress addr1 = InetSocketAddress.createUnresolved("host1", 1); PTContainer c1 = plan.getContainers().get(0); c1.setState(PTContainer.State.ALLOCATED); c1.host = "host1"; c1.bufferServerAddress = addr1; c1.setAllocatedMemoryMB(2); c1.setRequiredMemoryMB(1); c1.setAllocatedVCores(3); c1.setRequiredVCores(4); j.setOutputStream(new DataOutputStream(bos)); j.write(c1.getSetContainerState()); c1.setExternalId(null); c1.setState(PTContainer.State.NEW); c1.setExternalId(null); c1.host = null; c1.bufferServerAddress = null; bis = new ByteArrayInputStream(bos.toByteArray()); j.replay(new DataInputStream(bis)); assertEquals(externalId, c1.getExternalId()); assertEquals(PTContainer.State.ALLOCATED, c1.getState()); assertEquals("host1", c1.host); assertEquals(addr1, c1.bufferServerAddress); assertEquals(1, c1.getRequiredMemoryMB()); assertEquals(2, c1.getAllocatedMemoryMB()); assertEquals(3, c1.getAllocatedVCores()); assertEquals(4, c1.getRequiredVCores()); j.write(scm.getSetOperatorProperty("o1", "maxTuples", "100")); o1.setMaxTuples(10); j.setOutputStream(null); bis = new ByteArrayInputStream(bos.toByteArray()); j.replay(new DataInputStream(bis)); assertEquals(100, o1.getMaxTuples()); j.setOutputStream(new DataOutputStream(bos)); scm.setOperatorProperty("o1", "maxTuples", "10"); assertEquals(10, o1.getMaxTuples()); o1.setMaxTuples(100); assertEquals(100, o1.getMaxTuples()); j.setOutputStream(null); bis = new ByteArrayInputStream(bos.toByteArray()); j.replay(new DataInputStream(bis)); assertEquals(10, o1.getMaxTuples()); j.setOutputStream(new DataOutputStream(bos)); scm.setPhysicalOperatorProperty(o1p1.getId(), "maxTuples", "50"); }
/** * Test serialization of the container manager with mock execution layer. * * @throws Exception */ private void testContainerManager(StorageAgent agent) throws Exception { FileUtils.deleteDirectory(new File(testMeta.dir)); // clean any state from previous run LogicalPlan dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.dir); dag.setAttribute(OperatorContext.STORAGE_AGENT, agent); StatsListeningOperator o1 = dag.addOperator("o1", StatsListeningOperator.class); FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)); StreamingContainerManager scm = StreamingContainerManager.getInstance(recoveryHandler, dag, false); File expFile = new File(recoveryHandler.getDir(), FSRecoveryHandler.FILE_SNAPSHOT); Assert.assertTrue("snapshot file " + expFile, expFile.exists()); PhysicalPlan plan = scm.getPhysicalPlan(); assertEquals("number required containers", 1, plan.getContainers().size()); PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0); @SuppressWarnings( "UnusedAssignment") /* sneaky: the constructor does some changes to the container */ MockContainer mc = new MockContainer(scm, o1p1.getContainer()); PTContainer originalContainer = o1p1.getContainer(); Assert.assertNotNull(o1p1.getContainer().bufferServerAddress); assertEquals(PTContainer.State.ACTIVE, o1p1.getContainer().getState()); assertEquals("state " + o1p1, PTOperator.State.PENDING_DEPLOY, o1p1.getState()); // test restore initial snapshot + log dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.dir); scm = StreamingContainerManager.getInstance( new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)), dag, false); dag = scm.getLogicalPlan(); plan = scm.getPhysicalPlan(); o1p1 = plan.getOperators(dag.getOperatorMeta("o1")).get(0); assertEquals("post restore state " + o1p1, PTOperator.State.PENDING_DEPLOY, o1p1.getState()); o1 = (StatsListeningOperator) o1p1.getOperatorMeta().getOperator(); assertEquals( "containerId", originalContainer.getExternalId(), o1p1.getContainer().getExternalId()); assertEquals("stats listener", 1, o1p1.statsListeners.size()); assertEquals("number stats calls", 0, o1.processStatsCnt); // stats are not logged assertEquals("post restore 1", PTContainer.State.ALLOCATED, o1p1.getContainer().getState()); assertEquals( "post restore 1", originalContainer.bufferServerAddress, o1p1.getContainer().bufferServerAddress); StreamingContainerAgent sca = scm.getContainerAgent(originalContainer.getExternalId()); Assert.assertNotNull("allocated container restored " + originalContainer, sca); assertEquals( "memory usage allocated container", (int) OperatorContext.MEMORY_MB.defaultValue, sca.container.getAllocatedMemoryMB()); // YARN-1490 - simulate container terminated on AM recovery scm.scheduleContainerRestart(originalContainer.getExternalId()); assertEquals("memory usage of failed container", 0, sca.container.getAllocatedMemoryMB()); Checkpoint firstCheckpoint = new Checkpoint(3, 0, 0); mc = new MockContainer(scm, o1p1.getContainer()); checkpoint(scm, o1p1, firstCheckpoint); mc.stats(o1p1.getId()) .deployState(OperatorHeartbeat.DeployState.ACTIVE) .currentWindowId(3) .checkpointWindowId(3); mc.sendHeartbeat(); assertEquals("state " + o1p1, PTOperator.State.ACTIVE, o1p1.getState()); // logical plan modification triggers snapshot CreateOperatorRequest cor = new CreateOperatorRequest(); cor.setOperatorFQCN(GenericTestOperator.class.getName()); cor.setOperatorName("o2"); CreateStreamRequest csr = new CreateStreamRequest(); csr.setSourceOperatorName("o1"); csr.setSourceOperatorPortName("outport"); csr.setSinkOperatorName("o2"); csr.setSinkOperatorPortName("inport1"); FutureTask<?> lpmf = scm.logicalPlanModification(Lists.newArrayList(cor, csr)); while (!lpmf.isDone()) { scm.monitorHeartbeat(); } Assert.assertNull(lpmf.get()); // unmask exception, if any Assert.assertSame("dag references", dag, scm.getLogicalPlan()); assertEquals("number operators after plan modification", 2, dag.getAllOperators().size()); // set operator state triggers journal write o1p1.setState(PTOperator.State.INACTIVE); Checkpoint offlineCheckpoint = new Checkpoint(10, 0, 0); // write checkpoint while AM is out, // it needs to be picked up as part of restore checkpoint(scm, o1p1, offlineCheckpoint); // test restore dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.dir); scm = StreamingContainerManager.getInstance( new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)), dag, false); Assert.assertNotSame("dag references", dag, scm.getLogicalPlan()); assertEquals( "number operators after restore", 2, scm.getLogicalPlan().getAllOperators().size()); dag = scm.getLogicalPlan(); plan = scm.getPhysicalPlan(); o1p1 = plan.getOperators(dag.getOperatorMeta("o1")).get(0); assertEquals("post restore state " + o1p1, PTOperator.State.INACTIVE, o1p1.getState()); o1 = (StatsListeningOperator) o1p1.getOperatorMeta().getOperator(); assertEquals("stats listener", 1, o1p1.statsListeners.size()); assertEquals("number stats calls post restore", 1, o1.processStatsCnt); assertEquals("post restore 1", PTContainer.State.ACTIVE, o1p1.getContainer().getState()); assertEquals( "post restore 1", originalContainer.bufferServerAddress, o1p1.getContainer().bufferServerAddress); // offline checkpoint detection assertEquals( "checkpoints after recovery", Lists.newArrayList(firstCheckpoint, offlineCheckpoint), o1p1.checkpoints); }
/** * Launch application for the dag represented by this client. * * @throws YarnException * @throws IOException */ public void startApplication() throws YarnException, IOException { Class<?>[] defaultClasses; if (applicationType.equals(YARN_APPLICATION_TYPE)) { // TODO restrict the security check to only check if security is enabled for webservices. if (UserGroupInformation.isSecurityEnabled()) { defaultClasses = DATATORRENT_SECURITY_CLASSES; } else { defaultClasses = DATATORRENT_CLASSES; } } else { throw new IllegalStateException(applicationType + " is not a valid application type."); } LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses); if (resources != null) { localJarFiles.addAll(resources); } YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info( "Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); // GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); // GetClusterNodesResponse clusterNodesResp = // rmClient.clientRM.getClusterNodes(clusterNodesReq); // LOG.info("Got Cluster node info from ASM"); // for (NodeReport node : clusterNodesResp.getNodeReports()) { // LOG.info("Got node report from ASM for" // + ", nodeId=" + node.getNodeId() // + ", nodeAddress" + node.getHttpAddress() // + ", nodeRackName" + node.getRackName() // + ", nodeNumContainers" + node.getNumContainers() // + ", nodeHealthStatus" + node.getHealthReport()); // } List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info( "User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication newApp = yarnClient.createApplication(); appId = newApp.getNewApplicationResponse().getApplicationId(); // Dump out information about cluster capability as seen by the resource manager int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); int amMemory = dag.getMasterMemoryMB(); if (amMemory > maxMem) { LOG.info( "AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) { dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString()); } // Create launch context for app master LOG.info("Setting up application submission context for ASM"); ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); // set the application id appContext.setApplicationId(appId); // set the application name appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME)); appContext.setApplicationType(this.applicationType); if (YARN_APPLICATION_TYPE.equals(this.applicationType)) { // appContext.setMaxAppAttempts(1); // no retries until Stram is HA } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Setup security tokens // If security is enabled get ResourceManager and NameNode delegation tokens. // Set these tokens on the container so that they are sent as part of application submission. // This also sets them up for renewal by ResourceManager. The NameNode delegation rmToken // is also used by ResourceManager to fetch the jars from HDFS and set them up for the // application master launch. if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } } finally { fs.close(); } addRMDelegationToken(tokenRenewer, credentials); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // copy required jar files to dfs, to be localized for containers FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { Path appsBasePath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS); Path appPath = new Path(appsBasePath, appId.toString()); String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {})); LOG.info("libjars: {}", libJarsCsv); dag.getAttributes().put(LogicalPlan.LIBRARY_JARS, libJarsCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.FILE, libJarsCsv, localResources, fs); if (archives != null) { String[] localFiles = archives.split(","); String archivesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("archives: {}", archivesCsv); dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.ARCHIVE, archivesCsv, localResources, fs); } if (files != null) { String[] localFiles = files.split(","); String filesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("files: {}", filesCsv); dag.getAttributes().put(LogicalPlan.FILES, filesCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.FILE, filesCsv, localResources, fs); } dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString()); if (dag.getAttributes().get(OperatorContext.STORAGE_AGENT) == null) { /* which would be the most likely case */ Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS); // use conf client side to pickup any proxy settings from dt-site.xml dag.setAttribute( OperatorContext.STORAGE_AGENT, new FSStorageAgent(checkpointPath.toString(), conf)); } if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) { dag.setAttribute( LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator()); } // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(appPath, "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } if (originalAppId != null) { Path origAppPath = new Path(appsBasePath, this.originalAppId); LOG.info("Restart from {}", origAppPath); copyInitialState(origAppPath); } // push logical plan to DFS location Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME); FSDataOutputStream outStream = fs.create(cfgDst, true); LogicalPlan.write(this.dag, outStream); outStream.close(); Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME); outStream = fs.create(launchConfigDst, true); conf.writeXml(outStream); outStream.close(); FileStatus topologyFileStatus = fs.getFileStatus(cfgDst); LocalResource topologyRsrc = Records.newRecord(LocalResource.class); topologyRsrc.setType(LocalResourceType.FILE); topologyRsrc.setVisibility(LocalResourceVisibility.APPLICATION); topologyRsrc.setResource(ConverterUtils.getYarnUrlFromURI(cfgDst.toUri())); topologyRsrc.setTimestamp(topologyFileStatus.getModificationTime()); topologyRsrc.setSize(topologyFileStatus.getLen()); localResources.put(LogicalPlan.SER_FILE_NAME, topologyRsrc); // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed // amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // Add application jar(s) location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar(s) // including ${CLASSPATH} will duplicate the class path in app master, removing it for now // StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*"); StringBuilder classPathEnv = new StringBuilder("./*"); String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH); for (String c : StringUtils.isBlank(classpath) ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH : classpath.split(",")) { if (c.equals("$HADOOP_CLIENT_CONF_DIR")) { // SPOI-2501 continue; } classPathEnv.append(':'); classPathEnv.append(c.trim()); } env.put("CLASSPATH", classPathEnv.toString()); // propagate to replace node managers user name (effective in non-secure mode) env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master ArrayList<CharSequence> vargs = new ArrayList<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(javaCmd); if (dag.isDebug()) { vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n"); } // Set Xmx based on am memory size // default heap size 75% of total memory vargs.add("-Xmx" + (amMemory * 3 / 4) + "m"); vargs.add("-XX:+HeapDumpOnOutOfMemoryError"); vargs.add("-XX:HeapDumpPath=/tmp/dt-heap-" + appId.getId() + ".bin"); vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA"); vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR); vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath())); if (dag.isDebug()) { vargs.add("-Dlog4j.debug=true"); } String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL); if (loggersLevel != null) { vargs.add(String.format("-D%s=%s", DTLoggerFactory.DT_LOGGERS_LEVEL, loggersLevel)); } vargs.add(StreamingAppMaster.class.getName()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final command StringBuilder command = new StringBuilder(9 * vargs.size()); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(queueName); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure String specStr = Objects.toStringHelper("Submitting application: ") .add("name", appContext.getApplicationName()) .add("queue", appContext.getQueue()) .add("user", UserGroupInformation.getLoginUser()) .add("resource", appContext.getResource()) .toString(); LOG.info(specStr); if (dag.isDebug()) { // LOG.info("Full submission context: " + appContext); } yarnClient.submitApplication(appContext); } finally { fs.close(); } }
@Test public void testGenerateDeployInfo() { TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class); GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class); GenericTestOperator o3 = dag.addOperator("o3", GenericTestOperator.class); GenericTestOperator o4 = dag.addOperator("o4", GenericTestOperator.class); dag.setOutputPortAttribute(o1.outport, PortContext.BUFFER_MEMORY_MB, 256); dag.addStream("o1.outport", o1.outport, o2.inport1); dag.setOutputPortAttribute(o1.outport, PortContext.SPIN_MILLIS, 99); dag.addStream("o2.outport1", o2.outport1, o3.inport1).setLocality(Locality.CONTAINER_LOCAL); dag.addStream("o3.outport1", o3.outport1, o4.inport1).setLocality(Locality.THREAD_LOCAL); dag.getAttributes().put(LogicalPlan.CONTAINERS_MAX_COUNT, 2); dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent()); Assert.assertEquals("number operators", 4, dag.getAllOperators().size()); Assert.assertEquals("number root operators", 1, dag.getRootOperators().size()); StreamingContainerManager dnm = new StreamingContainerManager(dag); Assert.assertEquals("number containers", 2, dnm.getPhysicalPlan().getContainers().size()); dnm.assignContainer( new ContainerResource(0, "container1Id", "host1", 1024, 0, null), InetSocketAddress.createUnresolved("host1", 9001)); dnm.assignContainer( new ContainerResource(0, "container2Id", "host2", 1024, 0, null), InetSocketAddress.createUnresolved("host2", 9002)); StreamingContainerAgent sca1 = dnm.getContainerAgent(dnm.getPhysicalPlan().getContainers().get(0).getExternalId()); StreamingContainerAgent sca2 = dnm.getContainerAgent(dnm.getPhysicalPlan().getContainers().get(1).getExternalId()); Assert.assertEquals("", dnm.getPhysicalPlan().getContainers().get(0), sca1.container); Assert.assertEquals("", PTContainer.State.ALLOCATED, sca1.container.getState()); List<OperatorDeployInfo> c1 = sca1.getDeployInfoList(sca1.container.getOperators()); Assert.assertEquals("number operators assigned to c1", 1, c1.size()); OperatorDeployInfo o1DI = getNodeDeployInfo(c1, dag.getMeta(o1)); Assert.assertNotNull(o1 + " assigned to " + sca1.container.getExternalId(), o1DI); Assert.assertEquals("type " + o1DI, OperatorDeployInfo.OperatorType.INPUT, o1DI.type); Assert.assertEquals("inputs " + o1DI.name, 0, o1DI.inputs.size()); Assert.assertEquals("outputs " + o1DI.name, 1, o1DI.outputs.size()); Assert.assertNotNull("contextAttributes " + o1DI.name, o1DI.contextAttributes); OutputDeployInfo c1o1outport = o1DI.outputs.get(0); Assert.assertNotNull("stream connection for container1", c1o1outport); Assert.assertEquals( "stream connection for container1", "o1.outport", c1o1outport.declaredStreamId); Assert.assertEquals( "stream connects to upstream host", sca1.container.host, c1o1outport.bufferServerHost); Assert.assertEquals( "stream connects to upstream port", sca1.container.bufferServerAddress.getPort(), c1o1outport.bufferServerPort); Assert.assertNotNull("contextAttributes " + c1o1outport, c1o1outport.contextAttributes); Assert.assertEquals( "contextAttributes " + c1o1outport, Integer.valueOf(99), c1o1outport.contextAttributes.get(PortContext.SPIN_MILLIS)); List<OperatorDeployInfo> c2 = sca2.getDeployInfoList(sca2.container.getOperators()); Assert.assertEquals("number operators assigned to container", 3, c2.size()); OperatorDeployInfo o2DI = getNodeDeployInfo(c2, dag.getMeta(o2)); OperatorDeployInfo o3DI = getNodeDeployInfo(c2, dag.getMeta(o3)); Assert.assertNotNull(dag.getMeta(o2) + " assigned to " + sca2.container.getExternalId(), o2DI); Assert.assertNotNull(dag.getMeta(o3) + " assigned to " + sca2.container.getExternalId(), o3DI); Assert.assertTrue( "The buffer server memory for container 1", 256 == sca1.getInitContext().getValue(ContainerContext.BUFFER_SERVER_MB)); Assert.assertTrue( "The buffer server memory for container 2", 0 == sca2.getInitContext().getValue(ContainerContext.BUFFER_SERVER_MB)); // buffer server input o2 from o1 InputDeployInfo c2o2i1 = getInputDeployInfo(o2DI, "o1.outport"); Assert.assertNotNull("stream connection for container2", c2o2i1); Assert.assertEquals( "stream connects to upstream host", sca1.container.host, c2o2i1.bufferServerHost); Assert.assertEquals( "stream connects to upstream port", sca1.container.bufferServerAddress.getPort(), c2o2i1.bufferServerPort); Assert.assertEquals( "portName " + c2o2i1, dag.getMeta(o2).getMeta(o2.inport1).getPortName(), c2o2i1.portName); Assert.assertNull("partitionKeys " + c2o2i1, c2o2i1.partitionKeys); Assert.assertEquals("sourceNodeId " + c2o2i1, o1DI.id, c2o2i1.sourceNodeId); Assert.assertEquals( "sourcePortName " + c2o2i1, TestGeneratorInputOperator.OUTPUT_PORT, c2o2i1.sourcePortName); Assert.assertNotNull("contextAttributes " + c2o2i1, c2o2i1.contextAttributes); // inline input o3 from o2 InputDeployInfo c2o3i1 = getInputDeployInfo(o3DI, "o2.outport1"); Assert.assertNotNull("input from o2.outport1", c2o3i1); Assert.assertEquals("portName " + c2o3i1, GenericTestOperator.IPORT1, c2o3i1.portName); Assert.assertNotNull("stream connection for container2", c2o3i1); Assert.assertNull("bufferServerHost " + c2o3i1, c2o3i1.bufferServerHost); Assert.assertEquals("bufferServerPort " + c2o3i1, 0, c2o3i1.bufferServerPort); Assert.assertNull("partitionKeys " + c2o3i1, c2o3i1.partitionKeys); Assert.assertEquals("sourceNodeId " + c2o3i1, o2DI.id, c2o3i1.sourceNodeId); Assert.assertEquals( "sourcePortName " + c2o3i1, GenericTestOperator.OPORT1, c2o3i1.sourcePortName); Assert.assertEquals("locality " + c2o3i1, Locality.CONTAINER_LOCAL, c2o3i1.locality); // THREAD_LOCAL o4.inport1 OperatorDeployInfo o4DI = getNodeDeployInfo(c2, dag.getMeta(o4)); Assert.assertNotNull(dag.getMeta(o4) + " assigned to " + sca2.container.getExternalId(), o4DI); InputDeployInfo c2o4i1 = getInputDeployInfo(o4DI, "o3.outport1"); Assert.assertNotNull("input from o3.outport1", c2o4i1); Assert.assertEquals("portName " + c2o4i1, GenericTestOperator.IPORT1, c2o4i1.portName); Assert.assertNotNull("stream connection for container2", c2o4i1); Assert.assertNull("bufferServerHost " + c2o4i1, c2o4i1.bufferServerHost); Assert.assertEquals("bufferServerPort " + c2o4i1, 0, c2o4i1.bufferServerPort); Assert.assertNull("partitionKeys " + c2o4i1, c2o4i1.partitionKeys); Assert.assertEquals("sourceNodeId " + c2o4i1, o3DI.id, c2o4i1.sourceNodeId); Assert.assertEquals( "sourcePortName " + c2o4i1, GenericTestOperator.OPORT1, c2o4i1.sourcePortName); Assert.assertEquals("locality " + c2o4i1, Locality.THREAD_LOCAL, c2o4i1.locality); }
@Test public void testStaticPartitioning() { // // ,---> node2----, // | | // node1---+---> node2----+--->unifier--->node3 // | | // '---> node2----' // GenericTestOperator node1 = dag.addOperator("node1", GenericTestOperator.class); PhysicalPlanTest.PartitioningTestOperator node2 = dag.addOperator("node2", PhysicalPlanTest.PartitioningTestOperator.class); node2.setPartitionCount(3); dag.setAttribute( node2, OperatorContext.SPIN_MILLIS, 10); /* this should not affect anything materially */ dag.setOutputPortAttribute(node2.outport1, PortContext.QUEUE_CAPACITY, 1111); GenericTestOperator node3 = dag.addOperator("node3", GenericTestOperator.class); dag.setInputPortAttribute(node3.inport1, PortContext.QUEUE_CAPACITY, 2222); LogicalPlan.StreamMeta n1n2 = dag.addStream("n1n2", node1.outport1, node2.inport1); LogicalPlan.StreamMeta n2n3 = dag.addStream("n2n3", node2.outport1, node3.inport1); dag.setAttribute(LogicalPlan.CONTAINERS_MAX_COUNT, Integer.MAX_VALUE); MemoryStorageAgent msa = new MemoryStorageAgent(); dag.setAttribute(OperatorContext.STORAGE_AGENT, msa); StreamingContainerManager dnm = new StreamingContainerManager(dag); PhysicalPlan plan = dnm.getPhysicalPlan(); Assert.assertEquals("number containers", 6, plan.getContainers().size()); List<StreamingContainerAgent> containerAgents = Lists.newArrayList(); for (int i = 0; i < plan.getContainers().size(); i++) { containerAgents.add(assignContainer(dnm, "container" + (i + 1))); } PTContainer c = plan.getOperators(dag.getMeta(node1)).get(0).getContainer(); StreamingContainerAgent sca1 = dnm.getContainerAgent(c.getExternalId()); List<OperatorDeployInfo> c1 = getDeployInfo(sca1); Assert.assertEquals("number operators assigned to container", 1, c1.size()); Assert.assertTrue( dag.getMeta(node2) + " assigned to " + sca1.container.getExternalId(), containsNodeContext(c1, dag.getMeta(node1))); List<PTOperator> o2Partitions = plan.getOperators(dag.getMeta(node2)); Assert.assertEquals( "number partitions", TestStaticPartitioningSerDe.partitions.length, o2Partitions.size()); for (int i = 0; i < o2Partitions.size(); i++) { String containerId = o2Partitions.get(i).getContainer().getExternalId(); List<OperatorDeployInfo> cc = getDeployInfo(dnm.getContainerAgent(containerId)); Assert.assertEquals("number operators assigned to container", 1, cc.size()); Assert.assertTrue( dag.getMeta(node2) + " assigned to " + containerId, containsNodeContext(cc, dag.getMeta(node2))); // n1n2 in, mergeStream out OperatorDeployInfo ndi = cc.get(0); Assert.assertEquals("type " + ndi, OperatorDeployInfo.OperatorType.GENERIC, ndi.type); Assert.assertEquals("inputs " + ndi, 1, ndi.inputs.size()); Assert.assertEquals("outputs " + ndi, 1, ndi.outputs.size()); InputDeployInfo nidi = ndi.inputs.get(0); Assert.assertEquals("stream " + nidi, n1n2.getName(), nidi.declaredStreamId); Assert.assertEquals( "partition for " + containerId, Sets.newHashSet(node2.partitionKeys[i]), nidi.partitionKeys); Assert.assertEquals("number stream codecs for " + nidi, 1, nidi.streamCodecs.size()); } // unifier List<PTOperator> o2Unifiers = plan.getMergeOperators(dag.getMeta(node2)); Assert.assertEquals("number unifiers", 1, o2Unifiers.size()); List<OperatorDeployInfo> cUnifier = getDeployInfo(dnm.getContainerAgent(o2Unifiers.get(0).getContainer().getExternalId())); Assert.assertEquals("number operators " + cUnifier, 1, cUnifier.size()); OperatorDeployInfo mergeNodeDI = getNodeDeployInfo(cUnifier, dag.getMeta(node2).getMeta(node2.outport1).getUnifierMeta()); Assert.assertNotNull("unifier for " + node2, mergeNodeDI); Assert.assertEquals( "type " + mergeNodeDI, OperatorDeployInfo.OperatorType.UNIFIER, mergeNodeDI.type); Assert.assertEquals("inputs " + mergeNodeDI, 3, mergeNodeDI.inputs.size()); List<Integer> sourceNodeIds = Lists.newArrayList(); for (InputDeployInfo nidi : mergeNodeDI.inputs) { Assert.assertEquals("streamName " + nidi, n2n3.getName(), nidi.declaredStreamId); String mergePortName = "<merge#" + dag.getMeta(node2).getMeta(node2.outport1).getPortName() + ">"; Assert.assertEquals("portName " + nidi, mergePortName, nidi.portName); Assert.assertNotNull("sourceNodeId " + nidi, nidi.sourceNodeId); Assert.assertNotNull("contextAttributes " + nidi, nidi.contextAttributes); Assert.assertEquals( "contextAttributes ", new Integer(1111), nidi.getValue(PortContext.QUEUE_CAPACITY)); sourceNodeIds.add(nidi.sourceNodeId); } for (PTOperator node : dnm.getPhysicalPlan().getOperators(dag.getMeta(node2))) { Assert.assertTrue( sourceNodeIds + " contains " + node.getId(), sourceNodeIds.contains(node.getId())); } Assert.assertEquals("outputs " + mergeNodeDI, 1, mergeNodeDI.outputs.size()); for (OutputDeployInfo odi : mergeNodeDI.outputs) { Assert.assertNotNull("contextAttributes " + odi, odi.contextAttributes); Assert.assertEquals( "contextAttributes ", new Integer(2222), odi.getValue(PortContext.QUEUE_CAPACITY)); } try { Object operator = msa.load(mergeNodeDI.id, Stateless.WINDOW_ID); Assert.assertTrue("" + operator, operator instanceof DefaultUnifier); } catch (IOException ex) { throw new RuntimeException(ex); } // node3 container c = plan.getOperators(dag.getMeta(node3)).get(0).getContainer(); List<OperatorDeployInfo> cmerge = getDeployInfo(dnm.getContainerAgent(c.getExternalId())); Assert.assertEquals("number operators " + cmerge, 1, cmerge.size()); OperatorDeployInfo node3DI = getNodeDeployInfo(cmerge, dag.getMeta(node3)); Assert.assertNotNull(dag.getMeta(node3) + " assigned", node3DI); Assert.assertEquals("inputs " + node3DI, 1, node3DI.inputs.size()); InputDeployInfo node3In = node3DI.inputs.get(0); Assert.assertEquals("streamName " + node3In, n2n3.getName(), node3In.declaredStreamId); Assert.assertEquals( "portName " + node3In, dag.getMeta(node3).getMeta(node3.inport1).getPortName(), node3In.portName); Assert.assertNotNull("sourceNodeId " + node3DI, node3In.sourceNodeId); Assert.assertEquals( "sourcePortName " + node3DI, mergeNodeDI.outputs.get(0).portName, node3In.sourcePortName); }
@Test public void testOperatorShutdown() { dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent()); GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class); GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class); GenericTestOperator o3 = dag.addOperator("o3", GenericTestOperator.class); dag.addStream("stream1", o1.outport1, o2.inport1); dag.addStream("stream2", o2.outport1, o3.inport1); dag.setAttribute( o2, OperatorContext.PARTITIONER, new StatelessPartitioner<GenericTestOperator>(2)); StreamingContainerManager scm = new StreamingContainerManager(dag); PhysicalPlan physicalPlan = scm.getPhysicalPlan(); Map<PTContainer, MockContainer> mockContainers = new HashMap<>(); for (PTContainer c : physicalPlan.getContainers()) { MockContainer mc = new MockContainer(scm, c); mockContainers.put(c, mc); } // deploy all containers for (Map.Entry<PTContainer, MockContainer> ce : mockContainers.entrySet()) { ce.getValue().deploy(); } for (Map.Entry<PTContainer, MockContainer> ce : mockContainers.entrySet()) { // skip buffer server purge in monitorHeartbeat ce.getKey().bufferServerAddress = null; } PTOperator o1p1 = physicalPlan.getOperators(dag.getMeta(o1)).get(0); MockContainer mc1 = mockContainers.get(o1p1.getContainer()); MockOperatorStats o1p1mos = mc1.stats(o1p1.getId()); o1p1mos.currentWindowId(1).checkpointWindowId(1).deployState(DeployState.ACTIVE); mc1.sendHeartbeat(); PTOperator o2p1 = physicalPlan.getOperators(dag.getMeta(o2)).get(0); MockContainer mc2 = mockContainers.get(o2p1.getContainer()); MockOperatorStats o2p1mos = mc2.stats(o2p1.getId()); o2p1mos.currentWindowId(1).checkpointWindowId(1).deployState(DeployState.ACTIVE); mc2.sendHeartbeat(); Assert.assertEquals("2 partitions", 2, physicalPlan.getOperators(dag.getMeta(o2)).size()); PTOperator o2p2 = physicalPlan.getOperators(dag.getMeta(o2)).get(1); MockContainer mc3 = mockContainers.get(o2p2.getContainer()); MockOperatorStats o2p2mos = mc3.stats(o2p2.getId()); o2p2mos.currentWindowId(1).checkpointWindowId(1).deployState(DeployState.ACTIVE); mc3.sendHeartbeat(); PTOperator o3p1 = physicalPlan.getOperators(dag.getMeta(o3)).get(0); MockContainer mc4 = mockContainers.get(o3p1.getContainer()); MockOperatorStats o3p1mos = mc4.stats(o3p1.getId()); o3p1mos.currentWindowId(1).checkpointWindowId(1).deployState(DeployState.ACTIVE); mc4.sendHeartbeat(); // unifier PTOperator unifier = physicalPlan.getMergeOperators(dag.getMeta(o2)).get(0); MockContainer mc5 = mockContainers.get(unifier.getContainer()); MockOperatorStats unifierp1mos = mc5.stats(unifier.getId()); unifierp1mos.currentWindowId(1).checkpointWindowId(1).deployState(DeployState.ACTIVE); mc5.sendHeartbeat(); o1p1mos.currentWindowId(2).deployState(DeployState.SHUTDOWN); mc1.sendHeartbeat(); scm.monitorHeartbeat(); Assert.assertEquals("committedWindowId", -1, scm.getCommittedWindowId()); scm.monitorHeartbeat(); // committedWindowId updated in next cycle Assert.assertEquals("committedWindowId", 1, scm.getCommittedWindowId()); scm.processEvents(); Assert.assertEquals( "containers at committedWindowId=1", 5, physicalPlan.getContainers().size()); // checkpoint window 2 o1p1mos.checkpointWindowId(2); mc1.sendHeartbeat(); scm.monitorHeartbeat(); Assert.assertEquals("committedWindowId", 1, scm.getCommittedWindowId()); o2p1mos.currentWindowId(2).checkpointWindowId(2); o2p2mos.currentWindowId(2).checkpointWindowId(2); o3p1mos.currentWindowId(2).checkpointWindowId(2); unifierp1mos.currentWindowId(2).checkpointWindowId(2); mc2.sendHeartbeat(); mc3.sendHeartbeat(); mc4.sendHeartbeat(); mc5.sendHeartbeat(); scm.monitorHeartbeat(); // Operators are shutdown when both operators reach window Id 2 Assert.assertEquals(0, o1p1.getContainer().getOperators().size()); Assert.assertEquals(0, o2p1.getContainer().getOperators().size()); Assert.assertEquals(0, physicalPlan.getContainers().size()); }
@Test public void testProcessHeartbeat() throws Exception { TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class); dag.setAttribute( o1, OperatorContext.STATS_LISTENERS, Arrays.asList(new StatsListener[] {new PartitioningTest.PartitionLoadWatch()})); dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent()); StreamingContainerManager scm = new StreamingContainerManager(dag); PhysicalPlan plan = scm.getPhysicalPlan(); Assert.assertEquals("number required containers", 1, plan.getContainers().size()); PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0); // assign container String containerId = "container1"; StreamingContainerAgent sca = scm.assignContainer( new ContainerResource(0, containerId, "localhost", 512, 0, null), InetSocketAddress.createUnresolved("localhost", 0)); Assert.assertNotNull(sca); Assert.assertEquals(PTContainer.State.ALLOCATED, o1p1.getContainer().getState()); Assert.assertEquals(PTOperator.State.PENDING_DEPLOY, o1p1.getState()); ContainerStats cstats = new ContainerStats(containerId); ContainerHeartbeat hb = new ContainerHeartbeat(); hb.setContainerStats(cstats); ContainerHeartbeatResponse chr = scm.processHeartbeat(hb); // get deploy request Assert.assertNotNull(chr.deployRequest); Assert.assertEquals("" + chr.deployRequest, 1, chr.deployRequest.size()); Assert.assertEquals(PTContainer.State.ACTIVE, o1p1.getContainer().getState()); Assert.assertEquals("state " + o1p1, PTOperator.State.PENDING_DEPLOY, o1p1.getState()); // first operator heartbeat OperatorHeartbeat ohb = new OperatorHeartbeat(); ohb.setNodeId(o1p1.getId()); ohb.setState(OperatorHeartbeat.DeployState.ACTIVE); OperatorStats stats = new OperatorStats(); stats.checkpoint = new Checkpoint(2, 0, 0); stats.windowId = 3; stats.outputPorts = Lists.newArrayList(); PortStats ps = new PortStats(TestGeneratorInputOperator.OUTPUT_PORT); ps.bufferServerBytes = 101; ps.tupleCount = 1; stats.outputPorts.add(ps); ohb.windowStats = Lists.newArrayList(stats); cstats.operators.add(ohb); scm.processHeartbeat(hb); // activate operator Assert.assertEquals(PTContainer.State.ACTIVE, o1p1.getContainer().getState()); Assert.assertEquals("state " + o1p1, PTOperator.State.ACTIVE, o1p1.getState()); Assert.assertEquals("tuples " + o1p1, 1, o1p1.stats.totalTuplesEmitted.get()); Assert.assertEquals("tuples " + o1p1, 0, o1p1.stats.totalTuplesProcessed.get()); Assert.assertEquals("window " + o1p1, 3, o1p1.stats.currentWindowId.get()); Assert.assertEquals("port stats", 1, o1p1.stats.outputPortStatusList.size()); PortStatus o1p1ps = o1p1.stats.outputPortStatusList.get(TestGeneratorInputOperator.OUTPUT_PORT); Assert.assertNotNull("port stats", o1p1ps); Assert.assertEquals("port stats", 1, o1p1ps.totalTuples); // second operator heartbeat stats = new OperatorStats(); stats.checkpoint = new Checkpoint(2, 0, 0); stats.windowId = 4; stats.outputPorts = Lists.newArrayList(); ps = new PortStats(TestGeneratorInputOperator.OUTPUT_PORT); ps.bufferServerBytes = 1; ps.tupleCount = 1; stats.outputPorts.add(ps); ohb.windowStats = Lists.newArrayList(stats); cstats.operators.clear(); cstats.operators.add(ohb); scm.processHeartbeat(hb); Assert.assertEquals("tuples " + o1p1, 2, o1p1.stats.totalTuplesEmitted.get()); Assert.assertEquals("window " + o1p1, 4, o1p1.stats.currentWindowId.get()); Assert.assertEquals("statsQueue " + o1p1, 2, o1p1.stats.listenerStats.size()); scm.processEvents(); Assert.assertEquals("statsQueue " + o1p1, 0, o1p1.stats.listenerStats.size()); Assert.assertEquals("lastStats " + o1p1, 2, o1p1.stats.lastWindowedStats.size()); }