Beispiel #1
0
  /**
   * Test serialization of the container manager with mock execution layer.
   *
   * @throws Exception
   */
  private void testContainerManager(StorageAgent agent) throws Exception {
    FileUtils.deleteDirectory(new File(testMeta.dir)); // clean any state from previous run

    LogicalPlan dag = new LogicalPlan();
    dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.dir);
    dag.setAttribute(OperatorContext.STORAGE_AGENT, agent);

    StatsListeningOperator o1 = dag.addOperator("o1", StatsListeningOperator.class);

    FSRecoveryHandler recoveryHandler =
        new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false));
    StreamingContainerManager scm =
        StreamingContainerManager.getInstance(recoveryHandler, dag, false);
    File expFile = new File(recoveryHandler.getDir(), FSRecoveryHandler.FILE_SNAPSHOT);
    Assert.assertTrue("snapshot file " + expFile, expFile.exists());

    PhysicalPlan plan = scm.getPhysicalPlan();
    assertEquals("number required containers", 1, plan.getContainers().size());

    PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0);

    @SuppressWarnings(
        "UnusedAssignment") /* sneaky: the constructor does some changes to the container */
    MockContainer mc = new MockContainer(scm, o1p1.getContainer());
    PTContainer originalContainer = o1p1.getContainer();

    Assert.assertNotNull(o1p1.getContainer().bufferServerAddress);
    assertEquals(PTContainer.State.ACTIVE, o1p1.getContainer().getState());
    assertEquals("state " + o1p1, PTOperator.State.PENDING_DEPLOY, o1p1.getState());

    // test restore initial snapshot + log
    dag = new LogicalPlan();
    dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.dir);
    scm =
        StreamingContainerManager.getInstance(
            new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)), dag, false);
    dag = scm.getLogicalPlan();
    plan = scm.getPhysicalPlan();

    o1p1 = plan.getOperators(dag.getOperatorMeta("o1")).get(0);
    assertEquals("post restore state " + o1p1, PTOperator.State.PENDING_DEPLOY, o1p1.getState());
    o1 = (StatsListeningOperator) o1p1.getOperatorMeta().getOperator();
    assertEquals(
        "containerId", originalContainer.getExternalId(), o1p1.getContainer().getExternalId());
    assertEquals("stats listener", 1, o1p1.statsListeners.size());
    assertEquals("number stats calls", 0, o1.processStatsCnt); // stats are not logged
    assertEquals("post restore 1", PTContainer.State.ALLOCATED, o1p1.getContainer().getState());
    assertEquals(
        "post restore 1",
        originalContainer.bufferServerAddress,
        o1p1.getContainer().bufferServerAddress);

    StreamingContainerAgent sca = scm.getContainerAgent(originalContainer.getExternalId());
    Assert.assertNotNull("allocated container restored " + originalContainer, sca);
    assertEquals(
        "memory usage allocated container",
        (int) OperatorContext.MEMORY_MB.defaultValue,
        sca.container.getAllocatedMemoryMB());

    // YARN-1490 - simulate container terminated on AM recovery
    scm.scheduleContainerRestart(originalContainer.getExternalId());
    assertEquals("memory usage of failed container", 0, sca.container.getAllocatedMemoryMB());

    Checkpoint firstCheckpoint = new Checkpoint(3, 0, 0);
    mc = new MockContainer(scm, o1p1.getContainer());
    checkpoint(scm, o1p1, firstCheckpoint);
    mc.stats(o1p1.getId())
        .deployState(OperatorHeartbeat.DeployState.ACTIVE)
        .currentWindowId(3)
        .checkpointWindowId(3);
    mc.sendHeartbeat();
    assertEquals("state " + o1p1, PTOperator.State.ACTIVE, o1p1.getState());

    // logical plan modification triggers snapshot
    CreateOperatorRequest cor = new CreateOperatorRequest();
    cor.setOperatorFQCN(GenericTestOperator.class.getName());
    cor.setOperatorName("o2");
    CreateStreamRequest csr = new CreateStreamRequest();
    csr.setSourceOperatorName("o1");
    csr.setSourceOperatorPortName("outport");
    csr.setSinkOperatorName("o2");
    csr.setSinkOperatorPortName("inport1");
    FutureTask<?> lpmf = scm.logicalPlanModification(Lists.newArrayList(cor, csr));
    while (!lpmf.isDone()) {
      scm.monitorHeartbeat();
    }
    Assert.assertNull(lpmf.get()); // unmask exception, if any

    Assert.assertSame("dag references", dag, scm.getLogicalPlan());
    assertEquals("number operators after plan modification", 2, dag.getAllOperators().size());

    // set operator state triggers journal write
    o1p1.setState(PTOperator.State.INACTIVE);

    Checkpoint offlineCheckpoint = new Checkpoint(10, 0, 0);
    // write checkpoint while AM is out,
    // it needs to be picked up as part of restore
    checkpoint(scm, o1p1, offlineCheckpoint);

    // test restore
    dag = new LogicalPlan();
    dag.setAttribute(LogicalPlan.APPLICATION_PATH, testMeta.dir);
    scm =
        StreamingContainerManager.getInstance(
            new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)), dag, false);

    Assert.assertNotSame("dag references", dag, scm.getLogicalPlan());
    assertEquals(
        "number operators after restore", 2, scm.getLogicalPlan().getAllOperators().size());

    dag = scm.getLogicalPlan();
    plan = scm.getPhysicalPlan();

    o1p1 = plan.getOperators(dag.getOperatorMeta("o1")).get(0);
    assertEquals("post restore state " + o1p1, PTOperator.State.INACTIVE, o1p1.getState());
    o1 = (StatsListeningOperator) o1p1.getOperatorMeta().getOperator();
    assertEquals("stats listener", 1, o1p1.statsListeners.size());
    assertEquals("number stats calls post restore", 1, o1.processStatsCnt);
    assertEquals("post restore 1", PTContainer.State.ACTIVE, o1p1.getContainer().getState());
    assertEquals(
        "post restore 1",
        originalContainer.bufferServerAddress,
        o1p1.getContainer().bufferServerAddress);

    // offline checkpoint detection
    assertEquals(
        "checkpoints after recovery",
        Lists.newArrayList(firstCheckpoint, offlineCheckpoint),
        o1p1.checkpoints);
  }
  @Test
  public void testOperatorShutdown() {
    dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());

    GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
    GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
    GenericTestOperator o3 = dag.addOperator("o3", GenericTestOperator.class);

    dag.addStream("stream1", o1.outport1, o2.inport1);
    dag.addStream("stream2", o2.outport1, o3.inport1);

    dag.setAttribute(
        o2, OperatorContext.PARTITIONER, new StatelessPartitioner<GenericTestOperator>(2));
    StreamingContainerManager scm = new StreamingContainerManager(dag);

    PhysicalPlan physicalPlan = scm.getPhysicalPlan();
    Map<PTContainer, MockContainer> mockContainers = new HashMap<>();
    for (PTContainer c : physicalPlan.getContainers()) {
      MockContainer mc = new MockContainer(scm, c);
      mockContainers.put(c, mc);
    }

    // deploy all containers
    for (Map.Entry<PTContainer, MockContainer> ce : mockContainers.entrySet()) {
      ce.getValue().deploy();
    }
    for (Map.Entry<PTContainer, MockContainer> ce : mockContainers.entrySet()) {
      // skip buffer server purge in monitorHeartbeat
      ce.getKey().bufferServerAddress = null;
    }

    PTOperator o1p1 = physicalPlan.getOperators(dag.getMeta(o1)).get(0);
    MockContainer mc1 = mockContainers.get(o1p1.getContainer());
    MockOperatorStats o1p1mos = mc1.stats(o1p1.getId());
    o1p1mos.currentWindowId(1).checkpointWindowId(1).deployState(DeployState.ACTIVE);
    mc1.sendHeartbeat();

    PTOperator o2p1 = physicalPlan.getOperators(dag.getMeta(o2)).get(0);
    MockContainer mc2 = mockContainers.get(o2p1.getContainer());
    MockOperatorStats o2p1mos = mc2.stats(o2p1.getId());
    o2p1mos.currentWindowId(1).checkpointWindowId(1).deployState(DeployState.ACTIVE);
    mc2.sendHeartbeat();

    Assert.assertEquals("2 partitions", 2, physicalPlan.getOperators(dag.getMeta(o2)).size());

    PTOperator o2p2 = physicalPlan.getOperators(dag.getMeta(o2)).get(1);
    MockContainer mc3 = mockContainers.get(o2p2.getContainer());
    MockOperatorStats o2p2mos = mc3.stats(o2p2.getId());
    o2p2mos.currentWindowId(1).checkpointWindowId(1).deployState(DeployState.ACTIVE);
    mc3.sendHeartbeat();

    PTOperator o3p1 = physicalPlan.getOperators(dag.getMeta(o3)).get(0);
    MockContainer mc4 = mockContainers.get(o3p1.getContainer());
    MockOperatorStats o3p1mos = mc4.stats(o3p1.getId());
    o3p1mos.currentWindowId(1).checkpointWindowId(1).deployState(DeployState.ACTIVE);
    mc4.sendHeartbeat();

    // unifier
    PTOperator unifier = physicalPlan.getMergeOperators(dag.getMeta(o2)).get(0);
    MockContainer mc5 = mockContainers.get(unifier.getContainer());
    MockOperatorStats unifierp1mos = mc5.stats(unifier.getId());
    unifierp1mos.currentWindowId(1).checkpointWindowId(1).deployState(DeployState.ACTIVE);
    mc5.sendHeartbeat();

    o1p1mos.currentWindowId(2).deployState(DeployState.SHUTDOWN);
    mc1.sendHeartbeat();
    scm.monitorHeartbeat();
    Assert.assertEquals("committedWindowId", -1, scm.getCommittedWindowId());
    scm.monitorHeartbeat(); // committedWindowId updated in next cycle
    Assert.assertEquals("committedWindowId", 1, scm.getCommittedWindowId());
    scm.processEvents();
    Assert.assertEquals(
        "containers at committedWindowId=1", 5, physicalPlan.getContainers().size());

    // checkpoint window 2
    o1p1mos.checkpointWindowId(2);
    mc1.sendHeartbeat();
    scm.monitorHeartbeat();

    Assert.assertEquals("committedWindowId", 1, scm.getCommittedWindowId());

    o2p1mos.currentWindowId(2).checkpointWindowId(2);
    o2p2mos.currentWindowId(2).checkpointWindowId(2);
    o3p1mos.currentWindowId(2).checkpointWindowId(2);
    unifierp1mos.currentWindowId(2).checkpointWindowId(2);
    mc2.sendHeartbeat();
    mc3.sendHeartbeat();
    mc4.sendHeartbeat();
    mc5.sendHeartbeat();
    scm.monitorHeartbeat();

    // Operators are shutdown when both operators reach window Id 2
    Assert.assertEquals(0, o1p1.getContainer().getOperators().size());
    Assert.assertEquals(0, o2p1.getContainer().getOperators().size());
    Assert.assertEquals(0, physicalPlan.getContainers().size());
  }