public void startNodeRepairAndWaitFinish(boolean canResume, boolean crossVdc) throws Exception { if (canResume && getLastSucceededRepairStatus(true) != null) { log.info("Resume last successful repair"); return; } DbRepairStatus state = null; for (int i = 0; i < DB_REPAIR_MAX_RETRY_COUNT; i++) { startNodeRepair(canResume, crossVdc); state = waitDbRepairFinish(true); if (state != null) { break; } // It could be cluster state changed, so we have to wait for ANY repair to finish here // We don't care if it's NotFound, Success, Or Failed for other state, repair for current // state is failed anyway. log.error( "No db repair found for current cluster state, waiting for possible stale repair to finish"); state = waitDbRepairFinish(false); // Trigger a new db repair log.info("Trigger a new db repair for current cluster state"); } if (state.getStatus() == DbRepairStatus.Status.FAILED) { log.error("Db node repair started at {} is failed", state.getStartTime()); throw new IllegalStateException("Repair failed"); } log.info("Db node repair started at {} is finished", state.getStartTime()); }
public DbRepairStatus waitDbRepairFinish(boolean forCurrentStateOnly) throws Exception { for (int lastProgress = -1; ; Thread.sleep(1000)) { DbRepairStatus status = getLastRepairStatus(forCurrentStateOnly); if (status == null) { log.info( "No db repair found(forCurrentStateOnly={})", forCurrentStateOnly ? "true" : "false"); return null; } if (status.getStatus() != DbRepairStatus.Status.IN_PROGRESS) { log.info( "Db repair(forCurrentStateOnly={}) finished with state: {}", forCurrentStateOnly ? "true" : "false", status.toString()); return status; } int newProgress = status.getProgress(); if (newProgress != lastProgress) { log.info("Db repair started at {} is in progress {}%", status.getStartTime(), newProgress); lastProgress = newProgress; } } }