// stops the core, removes the transaction logs, restarts the core. void deleteLogs() throws Exception { String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir(); h.close(); try { String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir)); for (String file : files) { // new File(logDir, file).delete(); fs.delete(new Path(logDir, file), false); } assertEquals(0, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length); } finally { // make sure we create the core again, even if the assert fails so it won't mess // up the next test. createCore(); assertJQ(req("q", "*:*"), "/response/numFound=="); // ensure it works } }
@Test public void testReplicationFactor() throws Exception { clearIndex(); HdfsUpdateLog ulog = (HdfsUpdateLog) h.getCore().getUpdateHandler().getUpdateLog(); assertU(commit()); addAndGetVersion(sdoc("id", "REP1"), null); assertU(commit()); String[] logList = ulog.getLogList(new Path(ulog.getLogDir())); boolean foundRep2 = false; for (String tl : logList) { FileStatus status = fs.getFileStatus(new Path(ulog.getLogDir(), tl)); if (status.getReplication() == 2) { foundRep2 = true; break; } } assertTrue("Expected to find tlogs with a replication factor of 2", foundRep2); }
@Test public void testTruncatedLog() throws Exception { try { DirectUpdateHandler2.commitOnClose = false; final Semaphore logReplay = new Semaphore(0); final Semaphore logReplayFinish = new Semaphore(0); UpdateLog.testing_logReplayHook = new Runnable() { @Override public void run() { try { assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS)); } catch (Exception e) { throw new RuntimeException(e); } } }; UpdateLog.testing_logReplayFinishHook = new Runnable() { @Override public void run() { logReplayFinish.release(); } }; String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir(); clearIndex(); assertU(commit()); assertU(adoc("id", "F1")); assertU(adoc("id", "F2")); assertU(adoc("id", "F3")); h.close(); String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir)); Arrays.sort(files); FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length - 1])); dos.writeLong(0xffffffffffffffffL); dos.writeChars( "This should be appended to a good log file, representing a bad partially written record."); dos.close(); logReplay.release(1000); logReplayFinish.drainPermits(); ignoreException( "OutOfBoundsException"); // this is what the corrupted log currently produces... subject // to change. createCore(); assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS)); resetExceptionIgnores(); assertJQ(req("q", "*:*"), "/response/numFound==3"); // // Now test that the bad log file doesn't mess up retrieving latest versions // updateJ( jsonAdd(sdoc("id", "F4", "_version_", "104")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "F5", "_version_", "105")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "F6", "_version_", "106")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); // This currently skips the bad log file and also returns the version of the clearIndex (del // *:*) // assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]"); assertJQ(req("qt", "/get", "getVersions", "3"), "/versions==[106,105,104]"); } finally { DirectUpdateHandler2.commitOnClose = true; UpdateLog.testing_logReplayHook = null; UpdateLog.testing_logReplayFinishHook = null; } }
@Test public void testRemoveOldLogs() throws Exception { try { DirectUpdateHandler2.commitOnClose = false; final Semaphore logReplay = new Semaphore(0); final Semaphore logReplayFinish = new Semaphore(0); UpdateLog.testing_logReplayHook = new Runnable() { @Override public void run() { try { assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS)); } catch (Exception e) { throw new RuntimeException(e); } } }; UpdateLog.testing_logReplayFinishHook = new Runnable() { @Override public void run() { logReplayFinish.release(); } }; clearIndex(); assertU(commit()); String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir(); h.close(); String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir)); for (String file : files) { fs.delete(new Path(logDir, file), false); } assertEquals(0, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length); createCore(); int start = 0; int maxReq = 50; LinkedList<Long> versions = new LinkedList<>(); addDocs(10, start, versions); start += 10; assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); assertU(commit()); assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); addDocs(10, start, versions); start += 10; assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); assertU(commit()); assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); assertEquals(2, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length); addDocs(105, start, versions); start += 105; assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); assertU(commit()); assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); // previous two logs should be gone now assertEquals(1, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length); addDocs(1, start, versions); start += 1; h.close(); createCore(); // trigger recovery, make sure that tlog reference handling is correct // test we can get versions while replay is happening assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); logReplay.release(1000); assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS)); assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); addDocs(105, start, versions); start += 105; assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); assertU(commit()); assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); // previous logs should be gone now assertEquals(1, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length); // // test that a corrupt tlog file doesn't stop us from coming up, or seeing versions before // that tlog file. // addDocs( 1, start, new LinkedList< Long>()); // don't add this to the versions list because we are going to lose it... h.close(); files = HdfsUpdateLog.getLogList(fs, new Path(logDir)); ; Arrays.sort(files); FSDataOutputStream dos = fs.create(new Path(new Path(logDir), files[files.length - 1]), (short) 1); dos.writeUTF("This is a trashed log file that really shouldn't work at all, but we'll see.."); dos.close(); ignoreException("Failure to open existing"); createCore(); // we should still be able to get the list of versions (not including the trashed log file) assertJQ( req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start))); resetExceptionIgnores(); } finally { DirectUpdateHandler2.commitOnClose = true; UpdateLog.testing_logReplayHook = null; UpdateLog.testing_logReplayFinishHook = null; } }
// in rare circumstances, two logs can be left uncapped (lacking a commit at the end signifying // that all the content in the log was committed) @Test public void testRecoveryMultipleLogs() throws Exception { try { DirectUpdateHandler2.commitOnClose = false; final Semaphore logReplay = new Semaphore(0); final Semaphore logReplayFinish = new Semaphore(0); UpdateLog.testing_logReplayHook = new Runnable() { @Override public void run() { try { assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS)); } catch (Exception e) { throw new RuntimeException(e); } } }; UpdateLog.testing_logReplayFinishHook = new Runnable() { @Override public void run() { logReplayFinish.release(); } }; String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir(); clearIndex(); assertU(commit()); assertU(adoc("id", "AAAAAA")); assertU(adoc("id", "BBBBBB")); assertU(adoc("id", "CCCCCC")); h.close(); String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir)); Arrays.sort(files); String fname = files[files.length - 1]; FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length - 1])); dos.writeLong(0xffffffffffffffffL); dos.writeChars( "This should be appended to a good log file, representing a bad partially written record."); dos.close(); FSDataInputStream dis = fs.open(new Path(logDir, files[files.length - 1])); byte[] content = new byte[(int) dis.available()]; dis.readFully(content); dis.close(); // Now make a newer log file with just the IDs changed. NOTE: this may not work if log format // changes too much! findReplace( "AAAAAA".getBytes(StandardCharsets.UTF_8), "aaaaaa".getBytes(StandardCharsets.UTF_8), content); findReplace( "BBBBBB".getBytes(StandardCharsets.UTF_8), "bbbbbb".getBytes(StandardCharsets.UTF_8), content); findReplace( "CCCCCC".getBytes(StandardCharsets.UTF_8), "cccccc".getBytes(StandardCharsets.UTF_8), content); // WARNING... assumes format of .00000n where n is less than 9 long logNumber = Long.parseLong(fname.substring(fname.lastIndexOf(".") + 1)); String fname2 = String.format( Locale.ROOT, UpdateLog.LOG_FILENAME_PATTERN, UpdateLog.TLOG_NAME, logNumber + 1); dos = fs.create(new Path(logDir, fname2), (short) 1); dos.write(content); dos.close(); logReplay.release(1000); logReplayFinish.drainPermits(); ignoreException( "OutOfBoundsException"); // this is what the corrupted log currently produces... subject // to change. createCore(); assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS)); resetExceptionIgnores(); assertJQ(req("q", "*:*"), "/response/numFound==6"); } finally { DirectUpdateHandler2.commitOnClose = true; UpdateLog.testing_logReplayHook = null; UpdateLog.testing_logReplayFinishHook = null; } }
// // test that a corrupt tlog doesn't stop us from coming up // @Test public void testCorruptLog() throws Exception { try { DirectUpdateHandler2.commitOnClose = false; String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir(); clearIndex(); assertU(commit()); assertU(adoc("id", "G1")); assertU(adoc("id", "G2")); assertU(adoc("id", "G3")); h.close(); String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir)); Arrays.sort(files); FSDataOutputStream dos = fs.create(new Path(logDir, files[files.length - 1]), (short) 1); dos.write(new byte[(int) 800]); // zero out file dos.close(); ignoreException( "Failure to open existing log file"); // this is what the corrupted log currently // produces... subject to change. createCore(); resetExceptionIgnores(); // just make sure it responds assertJQ(req("q", "*:*"), "/response/numFound==0"); // // Now test that the bad log file doesn't mess up retrieving latest versions // updateJ( jsonAdd(sdoc("id", "G4", "_version_", "104")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "G5", "_version_", "105")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "G6", "_version_", "106")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); // This currently skips the bad log file and also returns the version of the clearIndex (del // *:*) // assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]"); assertJQ(req("qt", "/get", "getVersions", "3"), "/versions==[106,105,104]"); assertU(commit()); assertJQ(req("q", "*:*"), "/response/numFound==3"); // This messes up some other tests (on windows) if we don't remove the bad log. // This *should* hopefully just be because the tests are too fragile and not because of real // bugs - but it should be investigated further. deleteLogs(); } finally { DirectUpdateHandler2.commitOnClose = true; UpdateLog.testing_logReplayHook = null; UpdateLog.testing_logReplayFinishHook = null; } }