コード例 #1
0
ファイル: TestRecoveryHdfs.java プロジェクト: PATRIC3/p3_solr
@ThreadLeakFilters(
    defaultFilters = true,
    filters = {
      BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
    })
// TODO: longer term this should be combined with TestRecovery somehow ??
public class TestRecoveryHdfs extends SolrTestCaseJ4 {

  // means that we've seen the leader and have version info (i.e. we are a non-leader replica)
  private static String FROM_LEADER = DistribPhase.FROMLEADER.toString();

  private static int timeout =
      60; // acquire timeout in seconds.  change this to a huge number when debugging to prevent
  // threads from advancing.

  private static MiniDFSCluster dfsCluster;

  private static String hdfsUri;

  private static FileSystem fs;

  @BeforeClass
  public static void beforeClass() throws Exception {
    dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath());
    hdfsUri = HdfsTestUtil.getURI(dfsCluster);

    try {
      URI uri = new URI(hdfsUri);
      Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
      conf.setBoolean("fs.hdfs.impl.disable.cache", true);
      fs = FileSystem.get(uri, conf);
    } catch (IOException | URISyntaxException e) {
      throw new RuntimeException(e);
    }

    System.setProperty("solr.ulog.dir", hdfsUri + "/solr/shard1");

    initCore("solrconfig-tlog.xml", "schema15.xml");
  }

  @AfterClass
  public static void afterClass() throws Exception {
    System.clearProperty("solr.ulog.dir");
    System.clearProperty("test.build.data");
    System.clearProperty("test.cache.data");
    deleteCore();
    IOUtils.closeQuietly(fs);
    fs = null;
    HdfsTestUtil.teardownClass(dfsCluster);

    hdfsDataDir = null;
    dfsCluster = null;
  }

  // since we make up fake versions in these tests, we can get messed up by a DBQ with a real
  // version
  // since Solr can think following updates were reordered.
  @Override
  public void clearIndex() {
    try {
      deleteByQueryAndGetVersion(
          "*:*",
          params("_version_", Long.toString(-Long.MAX_VALUE), DISTRIB_UPDATE_PARAM, FROM_LEADER));
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  @Test
  public void testReplicationFactor() throws Exception {
    clearIndex();

    HdfsUpdateLog ulog = (HdfsUpdateLog) h.getCore().getUpdateHandler().getUpdateLog();

    assertU(commit());
    addAndGetVersion(sdoc("id", "REP1"), null);
    assertU(commit());

    String[] logList = ulog.getLogList(new Path(ulog.getLogDir()));
    boolean foundRep2 = false;
    for (String tl : logList) {
      FileStatus status = fs.getFileStatus(new Path(ulog.getLogDir(), tl));
      if (status.getReplication() == 2) {
        foundRep2 = true;
        break;
      }
    }

    assertTrue("Expected to find tlogs with a replication factor of 2", foundRep2);
  }

  @Test
  public void testLogReplay() throws Exception {
    try {

      DirectUpdateHandler2.commitOnClose = false;
      final Semaphore logReplay = new Semaphore(0);
      final Semaphore logReplayFinish = new Semaphore(0);

      UpdateLog.testing_logReplayHook =
          new Runnable() {
            @Override
            public void run() {
              try {
                assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
              } catch (Exception e) {
                throw new RuntimeException(e);
              }
            }
          };

      UpdateLog.testing_logReplayFinishHook =
          new Runnable() {
            @Override
            public void run() {
              logReplayFinish.release();
            }
          };

      clearIndex();
      assertU(commit());

      Deque<Long> versions = new ArrayDeque<>();
      versions.addFirst(addAndGetVersion(sdoc("id", "A1"), null));
      versions.addFirst(addAndGetVersion(sdoc("id", "A11"), null));
      versions.addFirst(addAndGetVersion(sdoc("id", "A12"), null));
      versions.addFirst(deleteByQueryAndGetVersion("id:A11", null));
      versions.addFirst(addAndGetVersion(sdoc("id", "A13"), null));

      assertJQ(req("q", "*:*"), "/response/numFound==0");

      assertJQ(req("qt", "/get", "getVersions", "" + versions.size()), "/versions==" + versions);

      h.close();
      createCore();
      // Solr should kick this off now
      // h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();

      // verify that previous close didn't do a commit
      // recovery should be blocked by our hook
      assertJQ(req("q", "*:*"), "/response/numFound==0");

      // make sure we can still access versions after a restart
      assertJQ(req("qt", "/get", "getVersions", "" + versions.size()), "/versions==" + versions);

      // unblock recovery
      logReplay.release(1000);

      // make sure we can still access versions during recovery
      assertJQ(req("qt", "/get", "getVersions", "" + versions.size()), "/versions==" + versions);

      // wait until recovery has finished
      assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));

      assertJQ(req("q", "*:*"), "/response/numFound==3");

      // make sure we can still access versions after recovery
      assertJQ(req("qt", "/get", "getVersions", "" + versions.size()), "/versions==" + versions);

      assertU(adoc("id", "A2"));
      assertU(adoc("id", "A3"));
      assertU(delI("A2"));
      assertU(adoc("id", "A4"));

      assertJQ(req("q", "*:*"), "/response/numFound==3");

      h.close();
      createCore();
      // Solr should kick this off now
      // h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();

      // wait until recovery has finished
      assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
      assertJQ(req("q", "*:*"), "/response/numFound==5");
      assertJQ(req("q", "id:A2"), "/response/numFound==0");

      // no updates, so insure that recovery does not run
      h.close();
      int permits = logReplay.availablePermits();
      createCore();
      // Solr should kick this off now
      // h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();

      assertJQ(req("q", "*:*"), "/response/numFound==5");
      Thread.sleep(100);
      assertEquals(
          permits, logReplay.availablePermits()); // no updates, so insure that recovery didn't run

      assertEquals(
          UpdateLog.State.ACTIVE, h.getCore().getUpdateHandler().getUpdateLog().getState());

    } finally {
      DirectUpdateHandler2.commitOnClose = true;
      UpdateLog.testing_logReplayHook = null;
      UpdateLog.testing_logReplayFinishHook = null;
    }
  }

  @Test
  public void testBuffering() throws Exception {

    DirectUpdateHandler2.commitOnClose = false;
    final Semaphore logReplay = new Semaphore(0);
    final Semaphore logReplayFinish = new Semaphore(0);

    UpdateLog.testing_logReplayHook =
        new Runnable() {
          @Override
          public void run() {
            try {
              assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
            } catch (Exception e) {
              throw new RuntimeException(e);
            }
          }
        };

    UpdateLog.testing_logReplayFinishHook =
        new Runnable() {
          @Override
          public void run() {
            logReplayFinish.release();
          }
        };

    SolrQueryRequest req = req();
    UpdateHandler uhandler = req.getCore().getUpdateHandler();
    UpdateLog ulog = uhandler.getUpdateLog();

    try {
      clearIndex();
      assertU(commit());

      assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
      ulog.bufferUpdates();
      assertEquals(UpdateLog.State.BUFFERING, ulog.getState());
      Future<UpdateLog.RecoveryInfo> rinfoFuture = ulog.applyBufferedUpdates();
      assertTrue(rinfoFuture == null);
      assertEquals(UpdateLog.State.ACTIVE, ulog.getState());

      ulog.bufferUpdates();
      assertEquals(UpdateLog.State.BUFFERING, ulog.getState());

      // simulate updates from a leader
      updateJ(
          jsonAdd(sdoc("id", "B1", "_version_", "1010")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "B11", "_version_", "1015")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonDelQ("id:B1 id:B11 id:B2 id:B3"),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-1017"));
      updateJ(
          jsonAdd(sdoc("id", "B2", "_version_", "1020")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "B3", "_version_", "1030")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      deleteAndGetVersion("B1", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-2010"));

      assertJQ(
          req("qt", "/get", "getVersions", "6"),
          "=={'versions':[-2010,1030,1020,-1017,1015,1010]}");

      assertU(commit());

      assertJQ(
          req("qt", "/get", "getVersions", "6"),
          "=={'versions':[-2010,1030,1020,-1017,1015,1010]}");

      // updates should be buffered, so we should not see any results yet.
      assertJQ(req("q", "*:*"), "/response/numFound==0");

      // real-time get should also not show anything (this could change in the future,
      // but it's currently used for validating version numbers too, so it would
      // be bad for updates to be visible if we're just buffering.
      assertJQ(req("qt", "/get", "id", "B3"), "=={'doc':null}");

      rinfoFuture = ulog.applyBufferedUpdates();
      assertTrue(rinfoFuture != null);

      assertEquals(UpdateLog.State.APPLYING_BUFFERED, ulog.getState());

      logReplay.release(1000);

      UpdateLog.RecoveryInfo rinfo = rinfoFuture.get();
      assertEquals(UpdateLog.State.ACTIVE, ulog.getState());

      assertJQ(
          req("qt", "/get", "getVersions", "6"),
          "=={'versions':[-2010,1030,1020,-1017,1015,1010]}");

      assertJQ(req("q", "*:*"), "/response/numFound==2");

      // move back to recovering
      ulog.bufferUpdates();
      assertEquals(UpdateLog.State.BUFFERING, ulog.getState());

      Long ver = getVer(req("qt", "/get", "id", "B3"));
      assertEquals(1030L, ver.longValue());

      // add a reordered doc that shouldn't overwrite one in the index
      updateJ(
          jsonAdd(sdoc("id", "B3", "_version_", "3")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      // reorder two buffered updates
      updateJ(
          jsonAdd(sdoc("id", "B4", "_version_", "1040")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      deleteAndGetVersion(
          "B4",
          params(
              DISTRIB_UPDATE_PARAM,
              FROM_LEADER,
              "_version_",
              "-940")); // this update should not take affect
      updateJ(
          jsonAdd(sdoc("id", "B6", "_version_", "1060")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "B5", "_version_", "1050")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "B8", "_version_", "1080")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      // test that delete by query is at least buffered along with everything else so it will delete
      // the
      // currently buffered id:8 (even if it doesn't currently support versioning)
      updateJ(
          "{\"delete\": { \"query\":\"id:B2 OR id:B8\" }}",
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-3000"));

      assertJQ(
          req("qt", "/get", "getVersions", "13"),
          "=={'versions':[-3000,1080,1050,1060,-940,1040,3,-2010,1030,1020,-1017,1015,1010]}" // the
          // "3"
          // appears because versions aren't checked while buffering
          );

      logReplay.drainPermits();
      rinfoFuture = ulog.applyBufferedUpdates();
      assertTrue(rinfoFuture != null);
      assertEquals(UpdateLog.State.APPLYING_BUFFERED, ulog.getState());

      // apply a single update
      logReplay.release(1);

      // now add another update
      updateJ(
          jsonAdd(sdoc("id", "B7", "_version_", "1070")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      // a reordered update that should be dropped
      deleteAndGetVersion("B5", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-950"));

      deleteAndGetVersion("B6", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-2060"));

      logReplay.release(1000);
      UpdateLog.RecoveryInfo recInfo = rinfoFuture.get();

      assertJQ(
          req("q", "*:*", "sort", "id asc", "fl", "id,_version_"),
          "/response/docs==["
              + "{'id':'B3','_version_':1030}"
              + ",{'id':'B4','_version_':1040}"
              + ",{'id':'B5','_version_':1050}"
              + ",{'id':'B7','_version_':1070}"
              + "]");

      assertEquals(1, recInfo.deleteByQuery);

      assertEquals(
          UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method in a good state
    } finally {
      DirectUpdateHandler2.commitOnClose = true;
      UpdateLog.testing_logReplayHook = null;
      UpdateLog.testing_logReplayFinishHook = null;

      req().close();
    }
  }

  @Test
  @Ignore("HDFS-3107: no truncate support yet")
  public void testDropBuffered() throws Exception {

    DirectUpdateHandler2.commitOnClose = false;
    final Semaphore logReplay = new Semaphore(0);
    final Semaphore logReplayFinish = new Semaphore(0);

    UpdateLog.testing_logReplayHook =
        new Runnable() {
          @Override
          public void run() {
            try {
              assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
            } catch (Exception e) {
              throw new RuntimeException(e);
            }
          }
        };

    UpdateLog.testing_logReplayFinishHook =
        new Runnable() {
          @Override
          public void run() {
            logReplayFinish.release();
          }
        };

    SolrQueryRequest req = req();
    UpdateHandler uhandler = req.getCore().getUpdateHandler();
    UpdateLog ulog = uhandler.getUpdateLog();

    try {
      clearIndex();
      assertU(commit());

      assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
      ulog.bufferUpdates();
      assertEquals(UpdateLog.State.BUFFERING, ulog.getState());
      Future<UpdateLog.RecoveryInfo> rinfoFuture = ulog.applyBufferedUpdates();
      assertTrue(rinfoFuture == null);
      assertEquals(UpdateLog.State.ACTIVE, ulog.getState());

      ulog.bufferUpdates();
      assertEquals(UpdateLog.State.BUFFERING, ulog.getState());

      // simulate updates from a leader
      updateJ(
          jsonAdd(sdoc("id", "C1", "_version_", "101")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "C2", "_version_", "102")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "C3", "_version_", "103")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      assertTrue(ulog.dropBufferedUpdates());
      ulog.bufferUpdates();
      updateJ(
          jsonAdd(sdoc("id", "C4", "_version_", "104")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "C5", "_version_", "105")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      logReplay.release(1000);
      rinfoFuture = ulog.applyBufferedUpdates();
      UpdateLog.RecoveryInfo rinfo = rinfoFuture.get();
      assertEquals(2, rinfo.adds);

      assertJQ(req("qt", "/get", "getVersions", "2"), "=={'versions':[105,104]}");

      // this time add some docs first before buffering starts (so tlog won't be at pos 0)
      updateJ(
          jsonAdd(sdoc("id", "C100", "_version_", "200")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "C101", "_version_", "201")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      ulog.bufferUpdates();
      updateJ(
          jsonAdd(sdoc("id", "C103", "_version_", "203")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "C104", "_version_", "204")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      assertTrue(ulog.dropBufferedUpdates());
      ulog.bufferUpdates();
      updateJ(
          jsonAdd(sdoc("id", "C105", "_version_", "205")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "C106", "_version_", "206")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      rinfoFuture = ulog.applyBufferedUpdates();
      rinfo = rinfoFuture.get();
      assertEquals(2, rinfo.adds);

      assertJQ(
          req("q", "*:*", "sort", "_version_ asc", "fl", "id,_version_"),
          "/response/docs==["
              + "{'id':'C4','_version_':104}"
              + ",{'id':'C5','_version_':105}"
              + ",{'id':'C100','_version_':200}"
              + ",{'id':'C101','_version_':201}"
              + ",{'id':'C105','_version_':205}"
              + ",{'id':'C106','_version_':206}"
              + "]");

      assertJQ(req("qt", "/get", "getVersions", "6"), "=={'versions':[206,205,201,200,105,104]}");

      ulog.bufferUpdates();
      assertEquals(UpdateLog.State.BUFFERING, ulog.getState());
      updateJ(
          jsonAdd(sdoc("id", "C301", "_version_", "998")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "C302", "_version_", "999")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      assertTrue(ulog.dropBufferedUpdates());

      // make sure we can overwrite with a lower version
      // TODO: is this functionality needed?
      updateJ(
          jsonAdd(sdoc("id", "C301", "_version_", "301")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "C302", "_version_", "302")),
          params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      assertU(commit());

      assertJQ(req("qt", "/get", "getVersions", "2"), "=={'versions':[302,301]}");

      assertJQ(
          req("q", "*:*", "sort", "_version_ desc", "fl", "id,_version_", "rows", "2"),
          "/response/docs==["
              + "{'id':'C302','_version_':302}"
              + ",{'id':'C301','_version_':301}"
              + "]");

      updateJ(
          jsonAdd(sdoc("id", "C2", "_version_", "302")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      assertEquals(
          UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method in a good state
    } finally {
      DirectUpdateHandler2.commitOnClose = true;
      UpdateLog.testing_logReplayHook = null;
      UpdateLog.testing_logReplayFinishHook = null;

      req().close();
    }
  }

  @Test
  public void testBufferingFlags() throws Exception {

    DirectUpdateHandler2.commitOnClose = false;
    final Semaphore logReplayFinish = new Semaphore(0);

    UpdateLog.testing_logReplayFinishHook =
        new Runnable() {
          @Override
          public void run() {
            logReplayFinish.release();
          }
        };

    SolrQueryRequest req = req();
    UpdateHandler uhandler = req.getCore().getUpdateHandler();
    UpdateLog ulog = uhandler.getUpdateLog();

    try {
      clearIndex();
      assertU(commit());

      assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
      ulog.bufferUpdates();

      // simulate updates from a leader
      updateJ(
          jsonAdd(sdoc("id", "Q1", "_version_", "101")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "Q2", "_version_", "102")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "Q3", "_version_", "103")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      assertEquals(UpdateLog.State.BUFFERING, ulog.getState());

      req.close();
      h.close();
      createCore();

      req = req();
      uhandler = req.getCore().getUpdateHandler();
      ulog = uhandler.getUpdateLog();

      logReplayFinish.acquire(); // wait for replay to finish

      assertTrue(
          (ulog.getStartingOperation() & UpdateLog.FLAG_GAP)
              != 0); // since we died while buffering, we should see this last

      //
      // Try again to ensure that the previous log replay didn't wipe out our flags
      //

      req.close();
      h.close();
      createCore();

      req = req();
      uhandler = req.getCore().getUpdateHandler();
      ulog = uhandler.getUpdateLog();

      assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0);

      // now do some normal non-buffered adds
      updateJ(
          jsonAdd(sdoc("id", "Q4", "_version_", "114")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "Q5", "_version_", "115")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "Q6", "_version_", "116")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      assertU(commit());

      req.close();
      h.close();
      createCore();

      req = req();
      uhandler = req.getCore().getUpdateHandler();
      ulog = uhandler.getUpdateLog();

      assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) == 0);

      ulog.bufferUpdates();
      // simulate receiving no updates
      ulog.applyBufferedUpdates();
      updateJ(
          jsonAdd(sdoc("id", "Q7", "_version_", "117")),
          params(
              DISTRIB_UPDATE_PARAM,
              FROM_LEADER)); // do another add to make sure flags are back to normal

      req.close();
      h.close();
      createCore();

      req = req();
      uhandler = req.getCore().getUpdateHandler();
      ulog = uhandler.getUpdateLog();

      assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) == 0); // check flags on Q7

      logReplayFinish.acquire();
      assertEquals(
          UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method in a good state
    } finally {
      DirectUpdateHandler2.commitOnClose = true;
      UpdateLog.testing_logReplayHook = null;
      UpdateLog.testing_logReplayFinishHook = null;

      req().close();
    }
  }

  // make sure that on a restart, versions don't start too low
  @Test
  public void testVersionsOnRestart() throws Exception {
    clearIndex();
    assertU(commit());

    assertU(adoc("id", "D1", "val_i", "1"));
    assertU(adoc("id", "D2", "val_i", "1"));
    assertU(commit());
    long v1 = getVer(req("q", "id:D1"));
    long v1a = getVer(req("q", "id:D2"));

    h.close();
    createCore();

    assertU(adoc("id", "D1", "val_i", "2"));
    assertU(commit());
    long v2 = getVer(req("q", "id:D1"));

    assert (v2 > v1);

    assertJQ(req("qt", "/get", "getVersions", "2"), "/versions==[" + v2 + "," + v1a + "]");
  }

  // make sure that log isn't needlessly replayed after a clean close
  @Test
  public void testCleanShutdown() throws Exception {
    DirectUpdateHandler2.commitOnClose = true;
    final Semaphore logReplay = new Semaphore(0);
    final Semaphore logReplayFinish = new Semaphore(0);

    UpdateLog.testing_logReplayHook =
        new Runnable() {
          @Override
          public void run() {
            try {
              assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
            } catch (Exception e) {
              throw new RuntimeException(e);
            }
          }
        };

    UpdateLog.testing_logReplayFinishHook =
        new Runnable() {
          @Override
          public void run() {
            logReplayFinish.release();
          }
        };

    SolrQueryRequest req = req();
    UpdateHandler uhandler = req.getCore().getUpdateHandler();
    UpdateLog ulog = uhandler.getUpdateLog();

    try {
      clearIndex();
      assertU(commit());

      assertU(adoc("id", "E1", "val_i", "1"));
      assertU(adoc("id", "E2", "val_i", "1"));

      // set to a high enough number so this test won't hang on a bug
      logReplay.release(10);

      h.close();
      createCore();

      // make sure the docs got committed
      assertJQ(req("q", "*:*"), "/response/numFound==2");

      // make sure no replay happened
      assertEquals(10, logReplay.availablePermits());

    } finally {
      DirectUpdateHandler2.commitOnClose = true;
      UpdateLog.testing_logReplayHook = null;
      UpdateLog.testing_logReplayFinishHook = null;

      req().close();
    }
  }

  private void addDocs(int nDocs, int start, LinkedList<Long> versions) throws Exception {
    for (int i = 0; i < nDocs; i++) {
      versions.addFirst(addAndGetVersion(sdoc("id", Integer.toString(start + nDocs)), null));
    }
  }

  @Test
  public void testRemoveOldLogs() throws Exception {
    try {
      DirectUpdateHandler2.commitOnClose = false;
      final Semaphore logReplay = new Semaphore(0);
      final Semaphore logReplayFinish = new Semaphore(0);

      UpdateLog.testing_logReplayHook =
          new Runnable() {
            @Override
            public void run() {
              try {
                assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
              } catch (Exception e) {
                throw new RuntimeException(e);
              }
            }
          };

      UpdateLog.testing_logReplayFinishHook =
          new Runnable() {
            @Override
            public void run() {
              logReplayFinish.release();
            }
          };

      clearIndex();
      assertU(commit());

      String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

      h.close();

      String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
      for (String file : files) {
        fs.delete(new Path(logDir, file), false);
      }

      assertEquals(0, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length);

      createCore();

      int start = 0;
      int maxReq = 50;

      LinkedList<Long> versions = new LinkedList<>();
      addDocs(10, start, versions);
      start += 10;
      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));
      assertU(commit());
      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));

      addDocs(10, start, versions);
      start += 10;
      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));
      assertU(commit());
      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));

      assertEquals(2, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length);

      addDocs(105, start, versions);
      start += 105;
      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));
      assertU(commit());
      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));

      // previous two logs should be gone now
      assertEquals(1, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length);

      addDocs(1, start, versions);
      start += 1;
      h.close();
      createCore(); // trigger recovery, make sure that tlog reference handling is correct

      // test we can get versions while replay is happening
      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));

      logReplay.release(1000);
      assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));

      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));

      addDocs(105, start, versions);
      start += 105;
      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));
      assertU(commit());
      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));

      // previous logs should be gone now
      assertEquals(1, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length);

      //
      // test that a corrupt tlog file doesn't stop us from coming up, or seeing versions before
      // that tlog file.
      //
      addDocs(
          1,
          start,
          new LinkedList<
              Long>()); // don't add this to the versions list because we are going to lose it...
      h.close();
      files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
      ;
      Arrays.sort(files);

      FSDataOutputStream dos =
          fs.create(new Path(new Path(logDir), files[files.length - 1]), (short) 1);
      dos.writeUTF("This is a trashed log file that really shouldn't work at all, but we'll see..");
      dos.close();

      ignoreException("Failure to open existing");
      createCore();
      // we should still be able to get the list of versions (not including the trashed log file)
      assertJQ(
          req("qt", "/get", "getVersions", "" + maxReq),
          "/versions==" + versions.subList(0, Math.min(maxReq, start)));
      resetExceptionIgnores();

    } finally {
      DirectUpdateHandler2.commitOnClose = true;
      UpdateLog.testing_logReplayHook = null;
      UpdateLog.testing_logReplayFinishHook = null;
    }
  }

  //
  // test that a partially written last tlog entry (that will cause problems for both reverse
  // reading and for
  // log replay) doesn't stop us from coming up, and from recovering the documents that were not cut
  // off.
  //

  @Test
  public void testTruncatedLog() throws Exception {
    try {
      DirectUpdateHandler2.commitOnClose = false;
      final Semaphore logReplay = new Semaphore(0);
      final Semaphore logReplayFinish = new Semaphore(0);

      UpdateLog.testing_logReplayHook =
          new Runnable() {
            @Override
            public void run() {
              try {
                assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
              } catch (Exception e) {
                throw new RuntimeException(e);
              }
            }
          };

      UpdateLog.testing_logReplayFinishHook =
          new Runnable() {
            @Override
            public void run() {
              logReplayFinish.release();
            }
          };

      String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

      clearIndex();
      assertU(commit());

      assertU(adoc("id", "F1"));
      assertU(adoc("id", "F2"));
      assertU(adoc("id", "F3"));

      h.close();

      String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
      Arrays.sort(files);

      FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length - 1]));

      dos.writeLong(0xffffffffffffffffL);
      dos.writeChars(
          "This should be appended to a good log file, representing a bad partially written record.");
      dos.close();

      logReplay.release(1000);
      logReplayFinish.drainPermits();
      ignoreException(
          "OutOfBoundsException"); // this is what the corrupted log currently produces... subject
      // to change.
      createCore();
      assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
      resetExceptionIgnores();
      assertJQ(req("q", "*:*"), "/response/numFound==3");

      //
      // Now test that the bad log file doesn't mess up retrieving latest versions
      //

      updateJ(
          jsonAdd(sdoc("id", "F4", "_version_", "104")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "F5", "_version_", "105")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "F6", "_version_", "106")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      // This currently skips the bad log file and also returns the version of the clearIndex (del
      // *:*)
      // assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]");
      assertJQ(req("qt", "/get", "getVersions", "3"), "/versions==[106,105,104]");

    } finally {
      DirectUpdateHandler2.commitOnClose = true;
      UpdateLog.testing_logReplayHook = null;
      UpdateLog.testing_logReplayFinishHook = null;
    }
  }

  //
  // test that a corrupt tlog doesn't stop us from coming up
  //
  @Test
  public void testCorruptLog() throws Exception {
    try {
      DirectUpdateHandler2.commitOnClose = false;

      String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

      clearIndex();
      assertU(commit());

      assertU(adoc("id", "G1"));
      assertU(adoc("id", "G2"));
      assertU(adoc("id", "G3"));

      h.close();

      String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
      Arrays.sort(files);

      FSDataOutputStream dos = fs.create(new Path(logDir, files[files.length - 1]), (short) 1);
      dos.write(new byte[(int) 800]); // zero out file
      dos.close();

      ignoreException(
          "Failure to open existing log file"); // this is what the corrupted log currently
      // produces... subject to change.
      createCore();
      resetExceptionIgnores();

      // just make sure it responds
      assertJQ(req("q", "*:*"), "/response/numFound==0");

      //
      // Now test that the bad log file doesn't mess up retrieving latest versions
      //

      updateJ(
          jsonAdd(sdoc("id", "G4", "_version_", "104")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "G5", "_version_", "105")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
      updateJ(
          jsonAdd(sdoc("id", "G6", "_version_", "106")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

      // This currently skips the bad log file and also returns the version of the clearIndex (del
      // *:*)
      // assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]");
      assertJQ(req("qt", "/get", "getVersions", "3"), "/versions==[106,105,104]");

      assertU(commit());

      assertJQ(req("q", "*:*"), "/response/numFound==3");

      // This messes up some other tests (on windows) if we don't remove the bad log.
      // This *should* hopefully just be because the tests are too fragile and not because of real
      // bugs - but it should be investigated further.
      deleteLogs();

    } finally {
      DirectUpdateHandler2.commitOnClose = true;
      UpdateLog.testing_logReplayHook = null;
      UpdateLog.testing_logReplayFinishHook = null;
    }
  }

  // in rare circumstances, two logs can be left uncapped (lacking a commit at the end signifying
  // that all the content in the log was committed)
  @Test
  public void testRecoveryMultipleLogs() throws Exception {
    try {
      DirectUpdateHandler2.commitOnClose = false;
      final Semaphore logReplay = new Semaphore(0);
      final Semaphore logReplayFinish = new Semaphore(0);

      UpdateLog.testing_logReplayHook =
          new Runnable() {
            @Override
            public void run() {
              try {
                assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
              } catch (Exception e) {
                throw new RuntimeException(e);
              }
            }
          };

      UpdateLog.testing_logReplayFinishHook =
          new Runnable() {
            @Override
            public void run() {
              logReplayFinish.release();
            }
          };

      String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

      clearIndex();
      assertU(commit());

      assertU(adoc("id", "AAAAAA"));
      assertU(adoc("id", "BBBBBB"));
      assertU(adoc("id", "CCCCCC"));

      h.close();
      String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
      Arrays.sort(files);
      String fname = files[files.length - 1];

      FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length - 1]));
      dos.writeLong(0xffffffffffffffffL);
      dos.writeChars(
          "This should be appended to a good log file, representing a bad partially written record.");
      dos.close();

      FSDataInputStream dis = fs.open(new Path(logDir, files[files.length - 1]));
      byte[] content = new byte[(int) dis.available()];

      dis.readFully(content);

      dis.close();

      // Now make a newer log file with just the IDs changed.  NOTE: this may not work if log format
      // changes too much!
      findReplace(
          "AAAAAA".getBytes(StandardCharsets.UTF_8),
          "aaaaaa".getBytes(StandardCharsets.UTF_8),
          content);
      findReplace(
          "BBBBBB".getBytes(StandardCharsets.UTF_8),
          "bbbbbb".getBytes(StandardCharsets.UTF_8),
          content);
      findReplace(
          "CCCCCC".getBytes(StandardCharsets.UTF_8),
          "cccccc".getBytes(StandardCharsets.UTF_8),
          content);

      // WARNING... assumes format of .00000n where n is less than 9
      long logNumber = Long.parseLong(fname.substring(fname.lastIndexOf(".") + 1));
      String fname2 =
          String.format(
              Locale.ROOT, UpdateLog.LOG_FILENAME_PATTERN, UpdateLog.TLOG_NAME, logNumber + 1);

      dos = fs.create(new Path(logDir, fname2), (short) 1);
      dos.write(content);
      dos.close();

      logReplay.release(1000);
      logReplayFinish.drainPermits();
      ignoreException(
          "OutOfBoundsException"); // this is what the corrupted log currently produces... subject
      // to change.
      createCore();
      assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
      resetExceptionIgnores();
      assertJQ(req("q", "*:*"), "/response/numFound==6");

    } finally {
      DirectUpdateHandler2.commitOnClose = true;
      UpdateLog.testing_logReplayHook = null;
      UpdateLog.testing_logReplayFinishHook = null;
    }
  }

  // NOTE: replacement must currently be same size
  private static void findReplace(byte[] from, byte[] to, byte[] data) {
    int idx = -from.length;
    for (; ; ) {
      idx = indexOf(from, data, idx + from.length); // skip over previous match
      if (idx < 0) break;
      for (int i = 0; i < to.length; i++) {
        data[idx + i] = to[i];
      }
    }
  }

  private static int indexOf(byte[] target, byte[] data, int start) {
    outer:
    for (int i = start; i < data.length - target.length; i++) {
      for (int j = 0; j < target.length; j++) {
        if (data[i + j] != target[j]) continue outer;
      }
      return i;
    }
    return -1;
  }

  // stops the core, removes the transaction logs, restarts the core.
  void deleteLogs() throws Exception {
    String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

    h.close();

    try {
      String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
      for (String file : files) {
        // new File(logDir, file).delete();
        fs.delete(new Path(logDir, file), false);
      }

      assertEquals(0, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length);
    } finally {
      // make sure we create the core again, even if the assert fails so it won't mess
      // up the next test.
      createCore();
      assertJQ(req("q", "*:*"), "/response/numFound=="); // ensure it works
    }
  }

  private static Long getVer(SolrQueryRequest req) throws Exception {
    String response = JQ(req);
    Map rsp = (Map) ObjectBuilder.fromJSON(response);
    Map doc = null;
    if (rsp.containsKey("doc")) {
      doc = (Map) rsp.get("doc");
    } else if (rsp.containsKey("docs")) {
      List lst = (List) rsp.get("docs");
      if (lst.size() > 0) {
        doc = (Map) lst.get(0);
      }
    } else if (rsp.containsKey("response")) {
      Map responseMap = (Map) rsp.get("response");
      List lst = (List) responseMap.get("docs");
      if (lst.size() > 0) {
        doc = (Map) lst.get(0);
      }
    }

    if (doc == null) return null;

    return (Long) doc.get("_version_");
  }
}