/** * Make a thread allocate a vlsn, but then fail before it's tracked by the vlsn index. This * happened in [#20919] when 1.rep environment close was called 2.the repNode was nulled out 3.a * concurrent writing thread got a NPE within its call to LogManager.log because the repNode was * null. This thread exited after it had bumped the vlsn, but before it had entered the vlsn in * the vlsnIndex 4.rep environment close tried to do a checkpoint, but the checkpoint hung. This * fix works by having (3) invalidate the environment, and by having (4) check for an invalidated * environment. */ @Test public void testLoggingFailure() throws DatabaseException, IOException { /* Make a single replicated environment. */ RepEnvInfo[] repEnvInfo = RepTestUtils.setupEnvInfos(envRoot, 1); RepTestUtils.joinGroup(repEnvInfo); /* * Disable cleaning and CBVLSN updating, to control vlsn creation * explicitly. */ Environment env = repEnvInfo[0].getEnv(); EnvironmentMutableConfig config = env.getMutableConfig(); config.setConfigParam("je.env.runCleaner", "false"); env.setMutableConfig(config); LocalCBVLSNUpdater.setSuppressGroupDBUpdates(false); DatabaseConfig dbConfig = new DatabaseConfig(); dbConfig.setTransactional(true); dbConfig.setAllowCreate(true); Database db = env.openDatabase(null, "foo", dbConfig); DatabaseEntry value = new DatabaseEntry(new byte[4]); EnvironmentImpl envImpl = DbInternal.getEnvironmentImpl(env); LogManager logManager = DbInternal.getEnvironmentImpl(env).getLogManager(); /* * Inject an exception into the next call to log() that is made * for a replicated log entry. */ logManager.setDelayVLSNRegisterHook(new ForceException()); VLSNIndex vlsnIndex = ((RepImpl) envImpl).getVLSNIndex(); try { db.put(null, value, value); fail("Should throw exception"); } catch (Exception expected) { assertTrue( "latest=" + vlsnIndex.getLatestAllocatedVal() + " last mapped=" + vlsnIndex.getRange().getLast().getSequence(), vlsnIndex.getLatestAllocatedVal() > vlsnIndex.getRange().getLast().getSequence()); } try { VLSNIndex.AWAIT_CONSISTENCY_MS = 1000; envImpl.awaitVLSNConsistency(); fail("Should throw and break out"); } catch (DatabaseException expected) { } /* Before the fix, this test hung. */ }
/** * This is really multiple tests in one. It tests network restore with a replica in each of the * following three states: * * <p>1) A brand new node joining the group and needing a network restore. * * <p>2) An existing node with its own unique log needing a network restore. * * <p>3) Repeated network restores, reflecting a mature node. */ @Test public void testBasic() throws DatabaseException, Exception { /* * The cleaner thread can see InsufficientLogExceptions so just stifle * those exceptions from stderr. */ DaemonThread.stifleExceptionChatter = true; configureForMaxCleaning(2); final RepEnvInfo info1 = repEnvInfo[0]; RepEnvInfo info2 = repEnvInfo[1]; ReplicatedEnvironment masterRep = info1.openEnv(); Environment menv = masterRep; EnvironmentMutableConfig mconfig = menv.getMutableConfig(); mconfig.setConfigParam(EnvironmentParams.ENV_RUN_CLEANER.getName(), "false"); menv.setMutableConfig(mconfig); /* * Have just the master join first. We do this to test the special case * of a brand new node joining a group and needing VLSN 1. The same * node then rejoins with its VLSN > 1 to test subsequent rejoins * where the node has already participated in the replication. */ populateDB(masterRep, TEST_DB_NAME, 100); mconfig = menv.getMutableConfig(); mconfig.setConfigParam(EnvironmentParams.ENV_RUN_CLEANER.getName(), "true"); menv.setMutableConfig(mconfig); File cenvDir = info2.getEnvHome(); final int cid = 2; for (int i = 0; i < RESTORE_CYCLES; i++) { leaveGroupAllButMaster(); shiftVLSNRight(masterRep); RepNodeImpl memberPrev = info1.getRepNode().getGroup().getMember(info2.getRepConfig().getNodeName()); /* Node1 is not known on the first iteration. */ final VLSN prevSync = (i == 0) ? null : memberPrev.getBarrierState().getLastCBVLSN(); try { /* Should force a network restore. */ setExceptionListener(info2); info2.openEnv(); fail("exception expected"); } catch (InsufficientLogException e) { RepNodeImpl member = info1.getRepNode().getGroup().getMember(info2.getRepConfig().getNodeName()); /* * The sync state should have been advanced to help contribute * to the global CBVLSN and prevent it from advancing. */ final VLSN currSync = member.getBarrierState().getLastCBVLSN(); assertTrue((i == 0) || currSync.compareTo(prevSync) >= 0); NetworkRestore networkRestore = new NetworkRestore(); networkRestore.execute(e, new NetworkRestoreConfig()); final NetworkBackupStats stats = networkRestore.getNetworkBackupStats(); assertThat(stats.getExpectedBytes(), greaterThan(0)); assertThat(stats.getTransferredBytes(), greaterThan(0)); /* Create a replacement replicator. */ info2 = RepTestUtils.setupEnvInfo(cenvDir, RepTestUtils.DEFAULT_DURABILITY, cid, info1); setExceptionListener(info2); info2.openEnv(); } /* Verify that we can continue with the "restored" log files. */ populateDB(masterRep, TEST_DB_NAME, 100, 100); VLSN commitVLSN = RepTestUtils.syncGroupToLastCommit(repEnvInfo, 2); RepTestUtils.checkNodeEquality(commitVLSN, false, repEnvInfo); info2.closeEnv(); } }