/** * Syncup the group and check for these requirements: - the master has all the data we expect - * the replicas have all the data that is on the master. * * <p>- the last VLSN is not a sync VLSN. We want to ensure that the matchpoint is not the last * VLSN, so the test will need to do rollback * * @throws InterruptedException * @return lastVLSN on the master */ private VLSN checkIfWholeGroupInSync( ReplicatedEnvironment master, RepEnvInfo[] repEnvInfo, RollbackWorkload workload) throws InterruptedException { /* * Make sure we're testing partial rollbacks, and that the replication * stream is poised at a place where the last sync VLSN != lastVLSN. */ VLSN lastVLSN = ensureDistinctLastAndSyncVLSN(master, repEnvInfo); RepTestUtils.syncGroupToVLSN(repEnvInfo, repEnvInfo.length, lastVLSN); /* * All nodes in the group should have the same data, and it should * consist of committed and uncommitted updates. */ assertTrue(workload.containsAllData(master)); /* * TODO: Node equality check is temporarily disabled because it (or * perhaps just the passing of time that allows for a heartbeat) causes * a GroupDB record to be written, which becomes the matchpoint and * defeats the test of partial rollback (because there is none). */ // RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); /* * TODO: The following fails if checkNodeEquality is called. Perhaps * we should just do this here and not above at the top of the method. */ lastVLSN = ensureDistinctLastAndSyncVLSN(master, repEnvInfo); return lastVLSN; }
/* Just check if the replica is in sync. */ private void waitForReplicaToSync(ReplicatedEnvironment master, RepEnvInfo[] repEnvInfo) throws InterruptedException { VLSN lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast(); RepTestUtils.syncGroupToVLSN(repEnvInfo, 2, lastVLSN); }
/** * Since the master never dies in this test, no rollbacks should occur, but no data should be lost * either. * * <p>TODO: Should the workload param be of a different class (not a RollbackWorkload), since its * masterSteadyWork method is only called here? */ private void replicasDieAndRejoin(RollbackWorkload workload, int numIterations) throws Throwable { RepEnvInfo[] repEnvInfo = null; try { /* Create a 3 node group. Assign identities. */ repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 3); ReplicatedEnvironment master = RepTestUtils.joinGroup(repEnvInfo); logger.severe("master=" + master); RepEnvInfo replicaA = null; RepEnvInfo replicaB = null; for (RepEnvInfo info : repEnvInfo) { if (info.getEnv().getState().isMaster()) { continue; } if (replicaA == null) { replicaA = info; } else { replicaB = info; } } /* * For the sake of easy test writing, make sure numIterations is an * even number. */ assertTrue((numIterations % 2) == 0); replicaA.abnormalCloseEnv(); for (int i = 0; i < numIterations; i++) { workload.masterSteadyWork(master); waitForReplicaToSync(master, repEnvInfo); if ((i % 2) == 0) { flushLogAndCrash(replicaB); replicaA.openEnv(); } else { flushLogAndCrash(replicaA); replicaB.openEnv(); } waitForReplicaToSync(master, repEnvInfo); } replicaA.openEnv(); VLSN lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast(); RepTestUtils.syncGroupToVLSN(repEnvInfo, repEnvInfo.length, lastVLSN); assertTrue(workload.containsAllData(master)); RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); workload.close(); for (RepEnvInfo repi : repEnvInfo) { /* * We're done with the test. Bringing down these replicators * forcibly, without closing transactions and whatnot. */ repi.abnormalCloseEnv(); } } catch (Throwable e) { e.printStackTrace(); throw e; } }
/** * Create 3 nodes and replicate operations. Kill off the master, and make the other two resume. * This will require a syncup and a rollback of any operations after the matchpoint. */ private void masterDiesAndRejoins(RollbackWorkload workload) throws Throwable { RepEnvInfo[] repEnvInfo = null; try { /* Create a 3 node group */ repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 3); ReplicatedEnvironment master = RepTestUtils.joinGroup(repEnvInfo); logger.severe("master=" + master); /* * Run a workload against the master. Sync up the group and check * that all nodes have the same contents. This first workload must * end with in-progress, uncommitted transactions. */ workload.beforeMasterCrash(master); VLSN lastVLSN = VLSN.NULL_VLSN; if (workload.noLockConflict()) { lastVLSN = checkIfWholeGroupInSync(master, repEnvInfo, workload); } /* * Crash the master, find a new master. */ RepEnvInfo oldMaster = repEnvInfo[RepInternal.getNodeId(master) - 1]; master = crashMasterAndElectNewMaster(master, repEnvInfo); RepEnvInfo newMaster = repEnvInfo[RepInternal.getNodeId(master) - 1]; logger.severe("newmaster=" + master); RepEnvInfo alwaysReplica = null; for (RepEnvInfo info : repEnvInfo) { if ((info != oldMaster) && (info != newMaster)) { alwaysReplica = info; break; } } /* * Check that the remaining two nodes only contain committed * updates. * TODO: check that the number of group members is 2. */ assertTrue(workload.containsSavedData(master)); RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); /* * Do some work against the new master, while the old master is * asleep. Note that the first workload may have contained * in-flight transactions, so this may result in the rollback of * some transactions in the first workload. */ workload.afterMasterCrashBeforeResumption(master); /* * The intent of this test is that the work after crash will end on * an incomplete txn. Check for that. */ lastVLSN = ensureDistinctLastAndSyncVLSN(master, repEnvInfo); /* Now bring up the old master. */ logger.info("Bring up old master"); oldMaster.openEnv(); logger.info("Old master joined"); RepTestUtils.syncGroupToVLSN(repEnvInfo, repEnvInfo.length, lastVLSN); logger.info("Old master synced"); /* * Check that all nodes only contain committed updates. */ workload.releaseDbLocks(); assertTrue(workload.containsSavedData(master)); RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); /* * Now crash the node that has never been a master. Do some work * without it, then recover that node, then do a verification * check. This exercises the recovery of a log that has syncups in * it. */ alwaysReplica.abnormalCloseEnv(); workload.afterReplicaCrash(master); lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast(); RepTestUtils.syncGroupToVLSN(repEnvInfo, 2, lastVLSN); alwaysReplica.openEnv(); RepTestUtils.syncGroupToVLSN(repEnvInfo, 3, lastVLSN); assertTrue(workload.containsSavedData(master)); RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); RepTestUtils.checkUtilizationProfile(repEnvInfo); workload.close(); /* * We're done with the test. Bringing down these replicators * forcibly, without closing transactions and whatnot. */ for (RepEnvInfo repi : repEnvInfo) { repi.abnormalCloseEnv(); } /* * Open and verify the environments one last time, to ensure that * rollbacks in the recovery interval don't cause problems. */ master = RepTestUtils.restartGroup(repEnvInfo); lastVLSN = RepInternal.getRepImpl(master).getVLSNIndex().getRange().getLast(); RepTestUtils.syncGroupToVLSN(repEnvInfo, 3, lastVLSN); RepTestUtils.checkNodeEquality(lastVLSN, verbose, repEnvInfo); /* Final close. */ for (RepEnvInfo repi : repEnvInfo) { repi.closeEnv(); } } catch (Throwable e) { e.printStackTrace(); throw e; } }