/* * Test the API: RepImpl.setBackupProhibited would disable the DbBackup in * DbBackup.startBackup, may be caused by Replay.rollback(). */ @Test public void testRollingBackDbBackupAPI() throws Throwable { RepEnvInfo[] repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 1); ReplicatedEnvironment master = RepTestUtils.joinGroup(repEnvInfo); RepImpl repImpl = RepInternal.getRepImpl(master); DbBackup backupHelper = new DbBackup(master); repImpl.setBackupProhibited(true); try { backupHelper.startBackup(); fail("Should throw out a LogOverwriteException here."); } catch (LogOverwriteException e) { /* Expect a LogOverwriteException here. */ } repImpl.setBackupProhibited(false); try { backupHelper.startBackup(); backupHelper.endBackup(); } catch (Exception e) { fail("Shouldn't get an exception here."); } finally { RepTestUtils.shutdownRepEnvs(repEnvInfo); } }
/** * In this test, we often want to check that the last item in the replicated stream is not a * matchpoint candidate (that VLSNRange.lastVLSN != VLSNRange.lastSync) There's nothing wrong * intrinsically with that being so, it's just that this test is trying to ensure that we test * partial rollbacks. * * @return lastVLSN * @throws InterruptedException */ private VLSN ensureDistinctLastAndSyncVLSN(ReplicatedEnvironment master, RepEnvInfo[] repEnvInfo) throws InterruptedException { VLSNIndex vlsnIndex = RepInternal.getRepImpl(master).getVLSNIndex(); VLSNRange range = vlsnIndex.getRange(); VLSN lastVLSN = range.getLast(); VLSN syncVLSN = range.getLastSync(); assertFalse("lastVLSN = " + lastVLSN + " syncVLSN = " + syncVLSN, lastVLSN.equals(syncVLSN)); return lastVLSN; }
/* Start the master (the helper node) first */ @Test public void testGroupCreateMasterFirst() throws DatabaseException { for (int i = 0; i < repEnvInfo.length; i++) { ReplicatedEnvironment rep = repEnvInfo[i].openEnv(); State state = rep.getState(); assertEquals((i == 0) ? State.MASTER : State.REPLICA, state); RepNode repNode = RepInternal.getRepImpl(rep).getRepNode(); /* No elections, helper nodes or members queried for master. */ assertEquals(0, repNode.getElections().getElectionCount()); } }
/** Crash the current master, and wait until the group elects a new one. */ private ReplicatedEnvironment crashMasterAndElectNewMaster( ReplicatedEnvironment master, RepEnvInfo[] repEnvInfo) { int masterIndex = RepInternal.getNodeId(master) - 1; logger.info("Crashing " + master.getNodeName()); repEnvInfo[masterIndex].abnormalCloseEnv(); logger.info("Rejoining"); ReplicatedEnvironment newMaster = RepTestUtils.openRepEnvsJoin(repEnvInfo); logger.info("New master = " + newMaster.getNodeName()); return newMaster; }
/* * Tests internal node removal APIs. */ @Test public void testRemoveMember() { createGroup(groupSize); ReplicatedEnvironment master = repEnvInfo[0].getEnv(); assertTrue(master.getState().isMaster()); RepNode masterRep = repEnvInfo[0].getRepNode(); /* Reduce the group size all the way down to one. */ for (int i = 1; i < groupSize; i++) { assertTrue(!RepInternal.isClosed(repEnvInfo[i].getEnv())); masterRep.removeMember(repEnvInfo[i].getEnv().getNodeName()); assertEquals((groupSize - i), masterRep.getGroup().getElectableGroupSize()); } /* Close the replica handles*/ for (int i = groupSize - 1; i > 0; i--) { repEnvInfo[i].closeEnv(); } /* Attempting to re-open them with the same node names should fail. */ for (int i = 1; i < groupSize; i++) { try { repEnvInfo[i].openEnv(); fail("Exception expected"); } catch (EnvironmentFailureException e) { /* Expected, the master should reject the attempt. */ assertEquals(EnvironmentFailureReason.HANDSHAKE_ERROR, e.getReason()); } } /* Doing the same but with different node names should be ok. */ for (int i = 1; i < groupSize; i++) { final RepEnvInfo ri = repEnvInfo[i]; final ReplicationConfig repConfig = ri.getRepConfig(); TestUtils.removeLogFiles("RemoveRepEnvironments", ri.getEnvHome(), false); repConfig.setNodeName("ReplaceNode_" + i); ri.openEnv(); assertEquals(i + 1, masterRep.getGroup().getElectableGroupSize()); } master.close(); }
/* * Test the API: RepImpl.invalidateDbBackups would disable the DbBackup * at endBackup, may be caused by Replay.rollback(). */ @Test public void testRollBackInvalidateDbBackup() throws Exception { RepEnvInfo[] repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 1); ReplicatedEnvironment master = RepTestUtils.joinGroup(repEnvInfo); final RepImpl repImpl = RepInternal.getRepImpl(master); DbBackup backupHelper = new DbBackup(master); backupHelper.startBackup(); backupHelper.setTestHook( new TestHook<Object>() { public void doHook() { repImpl.invalidateBackups(8L); } public Object getHookValue() { throw new UnsupportedOperationException(); } public void doIOHook() { throw new UnsupportedOperationException(); } public void hookSetup() { throw new UnsupportedOperationException(); } public void doHook(Object obj) { throw new UnsupportedOperationException(); } }); try { backupHelper.endBackup(); fail("Should throw out a LogOverwriteException here."); } catch (LogOverwriteException e) { /* Expect to get a LogOverwriteException here. */ } finally { RepTestUtils.shutdownRepEnvs(repEnvInfo); } }
/* * Tests internal node deletion APIs. */ @Test public void testDeleteMember() { createGroup(groupSize); ReplicatedEnvironment master = repEnvInfo[0].getEnv(); assertTrue(master.getState().isMaster()); RepNode masterRep = repEnvInfo[0].getRepNode(); /* Reduce the group size all the way down to one. */ for (int i = 1; i < groupSize; i++) { assertTrue(!RepInternal.isClosed(repEnvInfo[i].getEnv())); final String delName = repEnvInfo[i].getEnv().getNodeName(); repEnvInfo[i].closeEnv(); masterRep.removeMember(delName, true); assertEquals((groupSize - i), masterRep.getGroup().getElectableGroupSize()); } /* * Attempting to re-open them with the same node names should succeed */ for (int i = 1; i < groupSize; i++) { repEnvInfo[i].openEnv(); } }
/* Just check if the replica is in sync. */ private void waitForReplicaToSync(ReplicatedEnvironment master, RepEnvInfo[] repEnvInfo) throws InterruptedException { VLSN lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast(); RepTestUtils.syncGroupToVLSN(repEnvInfo, 2, lastVLSN); }
/** * Since the master never dies in this test, no rollbacks should occur, but no data should be lost * either. * * <p>TODO: Should the workload param be of a different class (not a RollbackWorkload), since its * masterSteadyWork method is only called here? */ private void replicasDieAndRejoin(RollbackWorkload workload, int numIterations) throws Throwable { RepEnvInfo[] repEnvInfo = null; try { /* Create a 3 node group. Assign identities. */ repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 3); ReplicatedEnvironment master = RepTestUtils.joinGroup(repEnvInfo); logger.severe("master=" + master); RepEnvInfo replicaA = null; RepEnvInfo replicaB = null; for (RepEnvInfo info : repEnvInfo) { if (info.getEnv().getState().isMaster()) { continue; } if (replicaA == null) { replicaA = info; } else { replicaB = info; } } /* * For the sake of easy test writing, make sure numIterations is an * even number. */ assertTrue((numIterations % 2) == 0); replicaA.abnormalCloseEnv(); for (int i = 0; i < numIterations; i++) { workload.masterSteadyWork(master); waitForReplicaToSync(master, repEnvInfo); if ((i % 2) == 0) { flushLogAndCrash(replicaB); replicaA.openEnv(); } else { flushLogAndCrash(replicaA); replicaB.openEnv(); } waitForReplicaToSync(master, repEnvInfo); } replicaA.openEnv(); VLSN lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast(); RepTestUtils.syncGroupToVLSN(repEnvInfo, repEnvInfo.length, lastVLSN); assertTrue(workload.containsAllData(master)); RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); workload.close(); for (RepEnvInfo repi : repEnvInfo) { /* * We're done with the test. Bringing down these replicators * forcibly, without closing transactions and whatnot. */ repi.abnormalCloseEnv(); } } catch (Throwable e) { e.printStackTrace(); throw e; } }
/** * Create 3 nodes and replicate operations. Kill off the master, and make the other two resume. * This will require a syncup and a rollback of any operations after the matchpoint. */ private void masterDiesAndRejoins(RollbackWorkload workload) throws Throwable { RepEnvInfo[] repEnvInfo = null; try { /* Create a 3 node group */ repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 3); ReplicatedEnvironment master = RepTestUtils.joinGroup(repEnvInfo); logger.severe("master=" + master); /* * Run a workload against the master. Sync up the group and check * that all nodes have the same contents. This first workload must * end with in-progress, uncommitted transactions. */ workload.beforeMasterCrash(master); VLSN lastVLSN = VLSN.NULL_VLSN; if (workload.noLockConflict()) { lastVLSN = checkIfWholeGroupInSync(master, repEnvInfo, workload); } /* * Crash the master, find a new master. */ RepEnvInfo oldMaster = repEnvInfo[RepInternal.getNodeId(master) - 1]; master = crashMasterAndElectNewMaster(master, repEnvInfo); RepEnvInfo newMaster = repEnvInfo[RepInternal.getNodeId(master) - 1]; logger.severe("newmaster=" + master); RepEnvInfo alwaysReplica = null; for (RepEnvInfo info : repEnvInfo) { if ((info != oldMaster) && (info != newMaster)) { alwaysReplica = info; break; } } /* * Check that the remaining two nodes only contain committed * updates. * TODO: check that the number of group members is 2. */ assertTrue(workload.containsSavedData(master)); RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); /* * Do some work against the new master, while the old master is * asleep. Note that the first workload may have contained * in-flight transactions, so this may result in the rollback of * some transactions in the first workload. */ workload.afterMasterCrashBeforeResumption(master); /* * The intent of this test is that the work after crash will end on * an incomplete txn. Check for that. */ lastVLSN = ensureDistinctLastAndSyncVLSN(master, repEnvInfo); /* Now bring up the old master. */ logger.info("Bring up old master"); oldMaster.openEnv(); logger.info("Old master joined"); RepTestUtils.syncGroupToVLSN(repEnvInfo, repEnvInfo.length, lastVLSN); logger.info("Old master synced"); /* * Check that all nodes only contain committed updates. */ workload.releaseDbLocks(); assertTrue(workload.containsSavedData(master)); RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); /* * Now crash the node that has never been a master. Do some work * without it, then recover that node, then do a verification * check. This exercises the recovery of a log that has syncups in * it. */ alwaysReplica.abnormalCloseEnv(); workload.afterReplicaCrash(master); lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast(); RepTestUtils.syncGroupToVLSN(repEnvInfo, 2, lastVLSN); alwaysReplica.openEnv(); RepTestUtils.syncGroupToVLSN(repEnvInfo, 3, lastVLSN); assertTrue(workload.containsSavedData(master)); RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); RepTestUtils.checkUtilizationProfile(repEnvInfo); workload.close(); /* * We're done with the test. Bringing down these replicators * forcibly, without closing transactions and whatnot. */ for (RepEnvInfo repi : repEnvInfo) { repi.abnormalCloseEnv(); } /* * Open and verify the environments one last time, to ensure that * rollbacks in the recovery interval don't cause problems. */ master = RepTestUtils.restartGroup(repEnvInfo); lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast(); RepTestUtils.syncGroupToVLSN(repEnvInfo, 3, lastVLSN); RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); /* Final close. */ for (RepEnvInfo repi : repEnvInfo) { repi.closeEnv(); } } catch (Throwable e) { e.printStackTrace(); throw e; } }
/** * Test that a timeout in the feeder while attempting to read the group database because other * feeders have it write locked causes the feeder (and replica) to fail, but allows the master to * continue operating. [#23822] */ @Test public void testJoinGroupReadGroupTimeout() throws DatabaseException, InterruptedException { /* Start first node as master */ ReplicatedEnvironment repEnv = repEnvInfo[0].openEnv(); assertEquals("Master node state", State.MASTER, repEnv.getState()); RepImpl repImpl = RepInternal.getRepImpl(repEnv); for (int i = 1; i <= 2; i++) { /* Get a write lock on the RepGroupDB */ final MasterTxn txn = new MasterTxn( repImpl, new TransactionConfig() .setDurability( new Durability( SyncPolicy.SYNC, SyncPolicy.SYNC, ReplicaAckPolicy.SIMPLE_MAJORITY)), repImpl.getNameIdPair()); final DatabaseImpl groupDbImpl = repImpl.getGroupDb(); final DatabaseEntry value = new DatabaseEntry(); final Cursor cursor = DbInternal.makeCursor(groupDbImpl, txn, new CursorConfig()); final OperationStatus status = cursor.getNext(RepGroupDB.groupKeyEntry, value, LockMode.RMW); assertEquals(i + ": Lock group result", OperationStatus.SUCCESS, status); /* Wait longer than the default 500 ms read timeout */ Thread.sleep(600); /* Test both electable and secondary nodes */ if (i == 2) { repEnvInfo[i].getRepConfig().setNodeType(NodeType.SECONDARY); } /* Create a thread that attempts to join another environment */ RepNodeThread repNodeThread = new RepNodeThread(i, i != 1); repNodeThread.start(); /* Wait for attempt to complete */ repNodeThread.join(30000); assertEquals("RN thread alive", false, repNodeThread.isAlive()); if (i == 1) { /* Join attempt should fail for primary */ assertNotNull("Expected RN thread exception", repNodeThread.te); /* Release write lock on RepGroupDB */ cursor.close(); txn.abort(); /* Second join attempt should succeed */ repNodeThread = new RepNodeThread(1); repNodeThread.start(); repNodeThread.join(30000); assertEquals("RN thread alive", false, repNodeThread.isAlive()); assertEquals("RN thread exception", null, repNodeThread.te); } else { /* Join attempt should succeed for secondary */ assertEquals("RN thread exception", null, repNodeThread.te); /* Release write lock on RepGroupDB */ cursor.close(); txn.abort(); } } }