private void testRestartApp(StorageAgent agent, String appPath1) throws Exception { FileUtils.deleteDirectory(new File(testMeta.dir)); // clean any state from previous run String appId1 = "app1"; String appId2 = "app2"; String appPath2 = testMeta.dir + "/" + appId2; LogicalPlan dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_ID, appId1); dag.setAttribute(LogicalPlan.APPLICATION_PATH, appPath1); dag.setAttribute(LogicalPlan.APPLICATION_ATTEMPT_ID, 1); dag.setAttribute(OperatorContext.STORAGE_AGENT, agent); dag.addOperator("o1", StatsListeningOperator.class); FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)); StreamingContainerManager.getInstance(recoveryHandler, dag, false); // test restore initial snapshot + log dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, appPath1); StreamingContainerManager scm = StreamingContainerManager.getInstance( new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)), dag, false); PhysicalPlan plan = scm.getPhysicalPlan(); dag = plan.getLogicalPlan(); // original plan Assert.assertNotNull("operator", dag.getOperatorMeta("o1")); PTOperator o1p1 = plan.getOperators(dag.getOperatorMeta("o1")).get(0); long[] ids = new FSStorageAgent(appPath1 + "/" + LogicalPlan.SUBDIR_CHECKPOINTS, new Configuration()) .getWindowIds(o1p1.getId()); Assert.assertArrayEquals(new long[] {o1p1.getRecoveryCheckpoint().getWindowId()}, ids); Assert.assertNull(o1p1.getContainer().getExternalId()); // trigger journal write o1p1.getContainer().setExternalId("cid1"); scm.writeJournal(o1p1.getContainer().getSetContainerState()); dag = new LogicalPlan(); dag.setAttribute(LogicalPlan.APPLICATION_PATH, appPath2); dag.setAttribute(LogicalPlan.APPLICATION_ID, appId2); StramClient sc = new StramClient(new Configuration(), dag); try { sc.start(); sc.copyInitialState(new Path(appPath1)); } finally { sc.stop(); } scm = StreamingContainerManager.getInstance( new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)), dag, false); plan = scm.getPhysicalPlan(); dag = plan.getLogicalPlan(); assertEquals("modified appId", appId2, dag.getValue(LogicalPlan.APPLICATION_ID)); assertEquals("modified appPath", appPath2, dag.getValue(LogicalPlan.APPLICATION_PATH)); Assert.assertNotNull("operator", dag.getOperatorMeta("o1")); o1p1 = plan.getOperators(dag.getOperatorMeta("o1")).get(0); assertEquals("journal copied", "cid1", o1p1.getContainer().getExternalId()); ids = new FSStorageAgent(appPath2 + "/" + LogicalPlan.SUBDIR_CHECKPOINTS, new Configuration()) .getWindowIds(o1p1.getId()); Assert.assertArrayEquals( "checkpoints copied", new long[] {o1p1.getRecoveryCheckpoint().getWindowId()}, ids); }
/** * Create deploy info for operator. * * <p> * * @return {@link com.datatorrent.stram.api.OperatorDeployInfo} */ private OperatorDeployInfo createOperatorDeployInfo(PTOperator oper) { OperatorDeployInfo ndi; if (oper.isUnifier()) { UnifierDeployInfo udi = new UnifierDeployInfo(); /* the constructor auto sets the type */ try { udi.operatorAttributes = oper.getUnifiedOperatorMeta().getAttributes().clone(); } catch (CloneNotSupportedException ex) { throw new RuntimeException("Cannot clone unifier attributes", ex); } ndi = udi; } else { ndi = new OperatorDeployInfo(); Operator operator = oper.getOperatorMeta().getOperator(); if (operator instanceof InputOperator) { ndi.type = OperatorType.INPUT; if (!oper.getInputs().isEmpty()) { // If there are no input ports then it has to be an input operator. But if there are input // ports then // we check if any input port is connected which would make it a Generic operator. for (PTOperator.PTInput ptInput : oper.getInputs()) { if (ptInput.logicalStream != null && ptInput.logicalStream.getSource() != null) { ndi.type = OperatorType.GENERIC; break; } } } } else { ndi.type = OperatorType.GENERIC; } } Checkpoint checkpoint = oper.getRecoveryCheckpoint(); ProcessingMode pm = oper.getOperatorMeta().getValue(OperatorContext.PROCESSING_MODE); if (pm == ProcessingMode.AT_MOST_ONCE || pm == ProcessingMode.EXACTLY_ONCE) { // TODO: following should be handled in the container at deploy time // for exactly once container should also purge previous checkpoint // whenever new checkpoint is written. StorageAgent agent = oper.getOperatorMeta().getAttributes().get(OperatorContext.STORAGE_AGENT); if (agent == null) { agent = initCtx.getValue(OperatorContext.STORAGE_AGENT); } // pick checkpoint most recently written try { long[] windowIds = agent.getWindowIds(oper.getId()); long checkpointId = Stateless.WINDOW_ID; for (long windowId : windowIds) { if (windowId > checkpointId) { checkpointId = windowId; } } if (checkpoint == null || checkpoint.windowId != checkpointId) { checkpoint = new Checkpoint(checkpointId, 0, 0); } } catch (Exception e) { throw new RuntimeException("Failed to determine checkpoint window id " + oper, e); } } LOG.debug("{} recovery checkpoint {}", oper, checkpoint); ndi.checkpoint = checkpoint; ndi.name = oper.getOperatorMeta().getName(); ndi.id = oper.getId(); try { // clone map before modifying it ndi.contextAttributes = oper.getOperatorMeta().getAttributes().clone(); } catch (CloneNotSupportedException ex) { throw new RuntimeException("Cannot clone operator attributes", ex); } if (oper.isOperatorStateLess()) { ndi.contextAttributes.put(OperatorContext.STATELESS, true); } return ndi; }