/*
   * Test the API: RepImpl.setBackupProhibited would disable the DbBackup in
   * DbBackup.startBackup, may be caused by Replay.rollback().
   */
  @Test
  public void testRollingBackDbBackupAPI() throws Throwable {

    RepEnvInfo[] repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 1);
    ReplicatedEnvironment master = RepTestUtils.joinGroup(repEnvInfo);
    RepImpl repImpl = RepInternal.getRepImpl(master);

    DbBackup backupHelper = new DbBackup(master);
    repImpl.setBackupProhibited(true);

    try {
      backupHelper.startBackup();
      fail("Should throw out a LogOverwriteException here.");
    } catch (LogOverwriteException e) {
      /* Expect a LogOverwriteException here. */
    }

    repImpl.setBackupProhibited(false);
    try {
      backupHelper.startBackup();
      backupHelper.endBackup();
    } catch (Exception e) {
      fail("Shouldn't get an exception here.");
    } finally {
      RepTestUtils.shutdownRepEnvs(repEnvInfo);
    }
  }
  /**
   * In this test, we often want to check that the last item in the replicated stream is not a
   * matchpoint candidate (that VLSNRange.lastVLSN != VLSNRange.lastSync) There's nothing wrong
   * intrinsically with that being so, it's just that this test is trying to ensure that we test
   * partial rollbacks.
   *
   * @return lastVLSN
   * @throws InterruptedException
   */
  private VLSN ensureDistinctLastAndSyncVLSN(ReplicatedEnvironment master, RepEnvInfo[] repEnvInfo)
      throws InterruptedException {

    VLSNIndex vlsnIndex = RepInternal.getRepImpl(master).getVLSNIndex();
    VLSNRange range = vlsnIndex.getRange();
    VLSN lastVLSN = range.getLast();
    VLSN syncVLSN = range.getLastSync();
    assertFalse("lastVLSN = " + lastVLSN + " syncVLSN = " + syncVLSN, lastVLSN.equals(syncVLSN));

    return lastVLSN;
  }
  /* Start the master (the helper node) first */
  @Test
  public void testGroupCreateMasterFirst() throws DatabaseException {

    for (int i = 0; i < repEnvInfo.length; i++) {
      ReplicatedEnvironment rep = repEnvInfo[i].openEnv();
      State state = rep.getState();
      assertEquals((i == 0) ? State.MASTER : State.REPLICA, state);
      RepNode repNode = RepInternal.getRepImpl(rep).getRepNode();
      /* No elections, helper nodes or members queried for master. */
      assertEquals(0, repNode.getElections().getElectionCount());
    }
  }
  /** Crash the current master, and wait until the group elects a new one. */
  private ReplicatedEnvironment crashMasterAndElectNewMaster(
      ReplicatedEnvironment master, RepEnvInfo[] repEnvInfo) {

    int masterIndex = RepInternal.getNodeId(master) - 1;

    logger.info("Crashing " + master.getNodeName());
    repEnvInfo[masterIndex].abnormalCloseEnv();

    logger.info("Rejoining");
    ReplicatedEnvironment newMaster = RepTestUtils.openRepEnvsJoin(repEnvInfo);

    logger.info("New master = " + newMaster.getNodeName());
    return newMaster;
  }
  /*
   * Tests internal node removal APIs.
   */
  @Test
  public void testRemoveMember() {
    createGroup(groupSize);
    ReplicatedEnvironment master = repEnvInfo[0].getEnv();
    assertTrue(master.getState().isMaster());

    RepNode masterRep = repEnvInfo[0].getRepNode();

    /* Reduce the group size all the way down to one. */
    for (int i = 1; i < groupSize; i++) {
      assertTrue(!RepInternal.isClosed(repEnvInfo[i].getEnv()));
      masterRep.removeMember(repEnvInfo[i].getEnv().getNodeName());
      assertEquals((groupSize - i), masterRep.getGroup().getElectableGroupSize());
    }

    /* Close the replica handles*/
    for (int i = groupSize - 1; i > 0; i--) {
      repEnvInfo[i].closeEnv();
    }

    /* Attempting to re-open them with the same node names should fail. */
    for (int i = 1; i < groupSize; i++) {
      try {
        repEnvInfo[i].openEnv();
        fail("Exception expected");
      } catch (EnvironmentFailureException e) {
        /* Expected, the master should reject the attempt. */
        assertEquals(EnvironmentFailureReason.HANDSHAKE_ERROR, e.getReason());
      }
    }

    /* Doing the same but with different node names should be ok. */
    for (int i = 1; i < groupSize; i++) {
      final RepEnvInfo ri = repEnvInfo[i];
      final ReplicationConfig repConfig = ri.getRepConfig();
      TestUtils.removeLogFiles("RemoveRepEnvironments", ri.getEnvHome(), false);

      repConfig.setNodeName("ReplaceNode_" + i);
      ri.openEnv();
      assertEquals(i + 1, masterRep.getGroup().getElectableGroupSize());
    }
    master.close();
  }
  /*
   * Test the API: RepImpl.invalidateDbBackups would disable the DbBackup
   * at endBackup, may be caused by Replay.rollback().
   */
  @Test
  public void testRollBackInvalidateDbBackup() throws Exception {

    RepEnvInfo[] repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 1);
    ReplicatedEnvironment master = RepTestUtils.joinGroup(repEnvInfo);
    final RepImpl repImpl = RepInternal.getRepImpl(master);

    DbBackup backupHelper = new DbBackup(master);
    backupHelper.startBackup();

    backupHelper.setTestHook(
        new TestHook<Object>() {
          public void doHook() {
            repImpl.invalidateBackups(8L);
          }

          public Object getHookValue() {
            throw new UnsupportedOperationException();
          }

          public void doIOHook() {
            throw new UnsupportedOperationException();
          }

          public void hookSetup() {
            throw new UnsupportedOperationException();
          }

          public void doHook(Object obj) {
            throw new UnsupportedOperationException();
          }
        });

    try {
      backupHelper.endBackup();
      fail("Should throw out a LogOverwriteException here.");
    } catch (LogOverwriteException e) {
      /* Expect to get a LogOverwriteException here. */
    } finally {
      RepTestUtils.shutdownRepEnvs(repEnvInfo);
    }
  }
  /*
   * Tests internal node deletion APIs.
   */
  @Test
  public void testDeleteMember() {
    createGroup(groupSize);
    ReplicatedEnvironment master = repEnvInfo[0].getEnv();
    assertTrue(master.getState().isMaster());

    RepNode masterRep = repEnvInfo[0].getRepNode();

    /* Reduce the group size all the way down to one. */
    for (int i = 1; i < groupSize; i++) {
      assertTrue(!RepInternal.isClosed(repEnvInfo[i].getEnv()));
      final String delName = repEnvInfo[i].getEnv().getNodeName();
      repEnvInfo[i].closeEnv();
      masterRep.removeMember(delName, true);
      assertEquals((groupSize - i), masterRep.getGroup().getElectableGroupSize());
    }

    /*
     * Attempting to re-open them with the same node names should succeed
     */
    for (int i = 1; i < groupSize; i++) {
      repEnvInfo[i].openEnv();
    }
  }
  /* Just check if the replica is in sync. */
  private void waitForReplicaToSync(ReplicatedEnvironment master, RepEnvInfo[] repEnvInfo)
      throws InterruptedException {

    VLSN lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast();
    RepTestUtils.syncGroupToVLSN(repEnvInfo, 2, lastVLSN);
  }
  /**
   * Since the master never dies in this test, no rollbacks should occur, but no data should be lost
   * either.
   *
   * <p>TODO: Should the workload param be of a different class (not a RollbackWorkload), since its
   * masterSteadyWork method is only called here?
   */
  private void replicasDieAndRejoin(RollbackWorkload workload, int numIterations) throws Throwable {

    RepEnvInfo[] repEnvInfo = null;

    try {
      /* Create a  3 node group. Assign identities. */
      repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 3);
      ReplicatedEnvironment master = RepTestUtils.joinGroup(repEnvInfo);
      logger.severe("master=" + master);

      RepEnvInfo replicaA = null;
      RepEnvInfo replicaB = null;

      for (RepEnvInfo info : repEnvInfo) {
        if (info.getEnv().getState().isMaster()) {
          continue;
        }

        if (replicaA == null) {
          replicaA = info;
        } else {
          replicaB = info;
        }
      }

      /*
       * For the sake of easy test writing, make sure numIterations is an
       * even number.
       */
      assertTrue((numIterations % 2) == 0);
      replicaA.abnormalCloseEnv();
      for (int i = 0; i < numIterations; i++) {
        workload.masterSteadyWork(master);
        waitForReplicaToSync(master, repEnvInfo);
        if ((i % 2) == 0) {
          flushLogAndCrash(replicaB);
          replicaA.openEnv();
        } else {
          flushLogAndCrash(replicaA);
          replicaB.openEnv();
        }
        waitForReplicaToSync(master, repEnvInfo);
      }
      replicaA.openEnv();

      VLSN lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast();
      RepTestUtils.syncGroupToVLSN(repEnvInfo, repEnvInfo.length, lastVLSN);

      assertTrue(workload.containsAllData(master));
      RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo);

      workload.close();
      for (RepEnvInfo repi : repEnvInfo) {
        /*
         * We're done with the test. Bringing down these replicators
         * forcibly, without closing transactions and whatnot.
         */
        repi.abnormalCloseEnv();
      }
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
    }
  }
Exemplo n.º 10
0
  /**
   * Create 3 nodes and replicate operations. Kill off the master, and make the other two resume.
   * This will require a syncup and a rollback of any operations after the matchpoint.
   */
  private void masterDiesAndRejoins(RollbackWorkload workload) throws Throwable {

    RepEnvInfo[] repEnvInfo = null;

    try {
      /* Create a  3 node group */
      repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 3);
      ReplicatedEnvironment master = RepTestUtils.joinGroup(repEnvInfo);
      logger.severe("master=" + master);

      /*
       * Run a workload against the master. Sync up the group and check
       * that all nodes have the same contents. This first workload must
       * end with in-progress, uncommitted transactions.
       */
      workload.beforeMasterCrash(master);
      VLSN lastVLSN = VLSN.NULL_VLSN;
      if (workload.noLockConflict()) {
        lastVLSN = checkIfWholeGroupInSync(master, repEnvInfo, workload);
      }

      /*
       * Crash the master, find a new master.
       */
      RepEnvInfo oldMaster = repEnvInfo[RepInternal.getNodeId(master) - 1];
      master = crashMasterAndElectNewMaster(master, repEnvInfo);
      RepEnvInfo newMaster = repEnvInfo[RepInternal.getNodeId(master) - 1];
      logger.severe("newmaster=" + master);
      RepEnvInfo alwaysReplica = null;
      for (RepEnvInfo info : repEnvInfo) {
        if ((info != oldMaster) && (info != newMaster)) {
          alwaysReplica = info;
          break;
        }
      }

      /*
       * Check that the remaining two nodes only contain committed
       * updates.
       * TODO: check that the number of group members is 2.
       */
      assertTrue(workload.containsSavedData(master));
      RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo);

      /*
       * Do some work against the new master, while the old master is
       * asleep. Note that the first workload may have contained
       * in-flight transactions, so this may result in the rollback of
       * some transactions in the first workload.
       */
      workload.afterMasterCrashBeforeResumption(master);

      /*
       * The intent of this test is that the work after crash will end on
       * an incomplete txn. Check for that.
       */
      lastVLSN = ensureDistinctLastAndSyncVLSN(master, repEnvInfo);

      /* Now bring up the old master. */
      logger.info("Bring up old master");
      oldMaster.openEnv();

      logger.info("Old master joined");
      RepTestUtils.syncGroupToVLSN(repEnvInfo, repEnvInfo.length, lastVLSN);
      logger.info("Old master synced");

      /*
       * Check that all nodes only contain committed updates.
       */
      workload.releaseDbLocks();
      assertTrue(workload.containsSavedData(master));
      RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo);

      /*
       * Now crash the node that has never been a master. Do some work
       * without it, then recover that node, then do a verification
       * check.  This exercises the recovery of a log that has syncups in
       * it.
       */
      alwaysReplica.abnormalCloseEnv();
      workload.afterReplicaCrash(master);

      lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast();
      RepTestUtils.syncGroupToVLSN(repEnvInfo, 2, lastVLSN);
      alwaysReplica.openEnv();
      RepTestUtils.syncGroupToVLSN(repEnvInfo, 3, lastVLSN);
      assertTrue(workload.containsSavedData(master));
      RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo);
      RepTestUtils.checkUtilizationProfile(repEnvInfo);

      workload.close();

      /*
       * We're done with the test. Bringing down these replicators
       * forcibly, without closing transactions and whatnot.
       */
      for (RepEnvInfo repi : repEnvInfo) {
        repi.abnormalCloseEnv();
      }

      /*
       * Open and verify the environments one last time, to ensure that
       * rollbacks in the recovery interval don't cause problems.
       */
      master = RepTestUtils.restartGroup(repEnvInfo);
      lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast();
      RepTestUtils.syncGroupToVLSN(repEnvInfo, 3, lastVLSN);
      RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo);

      /* Final close. */
      for (RepEnvInfo repi : repEnvInfo) {
        repi.closeEnv();
      }
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
    }
  }
  /**
   * Test that a timeout in the feeder while attempting to read the group database because other
   * feeders have it write locked causes the feeder (and replica) to fail, but allows the master to
   * continue operating. [#23822]
   */
  @Test
  public void testJoinGroupReadGroupTimeout() throws DatabaseException, InterruptedException {

    /* Start first node as master */
    ReplicatedEnvironment repEnv = repEnvInfo[0].openEnv();
    assertEquals("Master node state", State.MASTER, repEnv.getState());

    RepImpl repImpl = RepInternal.getRepImpl(repEnv);

    for (int i = 1; i <= 2; i++) {

      /* Get a write lock on the RepGroupDB */
      final MasterTxn txn =
          new MasterTxn(
              repImpl,
              new TransactionConfig()
                  .setDurability(
                      new Durability(
                          SyncPolicy.SYNC, SyncPolicy.SYNC, ReplicaAckPolicy.SIMPLE_MAJORITY)),
              repImpl.getNameIdPair());
      final DatabaseImpl groupDbImpl = repImpl.getGroupDb();
      final DatabaseEntry value = new DatabaseEntry();
      final Cursor cursor = DbInternal.makeCursor(groupDbImpl, txn, new CursorConfig());
      final OperationStatus status = cursor.getNext(RepGroupDB.groupKeyEntry, value, LockMode.RMW);
      assertEquals(i + ": Lock group result", OperationStatus.SUCCESS, status);

      /* Wait longer than the default 500 ms read timeout */
      Thread.sleep(600);

      /* Test both electable and secondary nodes */
      if (i == 2) {
        repEnvInfo[i].getRepConfig().setNodeType(NodeType.SECONDARY);
      }

      /* Create a thread that attempts to join another environment */
      RepNodeThread repNodeThread = new RepNodeThread(i, i != 1);
      repNodeThread.start();

      /* Wait for attempt to complete */
      repNodeThread.join(30000);
      assertEquals("RN thread alive", false, repNodeThread.isAlive());

      if (i == 1) {

        /* Join attempt should fail for primary */
        assertNotNull("Expected RN thread exception", repNodeThread.te);

        /* Release write lock on RepGroupDB */
        cursor.close();
        txn.abort();

        /* Second join attempt should succeed */
        repNodeThread = new RepNodeThread(1);
        repNodeThread.start();
        repNodeThread.join(30000);
        assertEquals("RN thread alive", false, repNodeThread.isAlive());
        assertEquals("RN thread exception", null, repNodeThread.te);
      } else {

        /* Join attempt should succeed for secondary */
        assertEquals("RN thread exception", null, repNodeThread.te);

        /* Release write lock on RepGroupDB */
        cursor.close();
        txn.abort();
      }
    }
  }