/** * This variant delays the formatting of the string message until it is actually logged * * @param now * @param level a {@link Level debug level} * @param cause evidentiary exception * @param stemformat a {@link String#format(String, Object...) string format} * @param args format arguments */ public void log(long now, Level level, Throwable cause, String stemformat, Object... args) { if (now - m_lastLogTime > m_maxLogIntervalMillis) { synchronized (this) { if (now - m_lastLogTime > m_maxLogIntervalMillis) { String message = formatMessage(cause, stemformat, args); switch (level) { case DEBUG: m_logger.debug(message); break; case ERROR: m_logger.error(message); break; case FATAL: m_logger.fatal(message); break; case INFO: m_logger.info(message); break; case TRACE: m_logger.trace(message); break; case WARN: m_logger.warn(message); break; } m_lastLogTime = now; } } } }
public void log(String message, long now, Level level) { if (now - m_lastLogTime > m_maxLogIntervalMillis) { synchronized (this) { if (now - m_lastLogTime > m_maxLogIntervalMillis) { switch (level) { case DEBUG: m_logger.debug(message); break; case ERROR: m_logger.error(message); break; case FATAL: m_logger.fatal(message); break; case INFO: m_logger.info(message); break; case TRACE: m_logger.trace(message); break; case WARN: m_logger.warn(message); break; } m_lastLogTime = now; } } } }
public void runDDL(String ddl) { try { // LOG.info("Executing " + ddl); Statement stmt = dbconn.createStatement(); /*boolean success =*/ stmt.execute(ddl); SQLWarning warn = stmt.getWarnings(); if (warn != null) sqlLog.warn(warn.getMessage()); // LOG.info("SQL DDL execute result: " + (success ? "true" : "false")); } catch (SQLException e) { hostLog.l7dlog(Level.ERROR, LogKeys.host_Backend_RunDDLFailed.name(), new Object[] {ddl}, e); } }
void shutdown() { try { if (m_hsql != null) { HsqlBackend.shutdownInstance(); } if (m_ee != null) { m_ee.release(); } if (m_snapshotter != null) { m_snapshotter.shutdown(); } } catch (InterruptedException e) { hostLog.warn("Interrupted shutdown execution site.", e); } }
/** * Connect to a single server with retry. Limited exponential backoff. No timeout. This will run * until the process is killed if it's not able to connect. * * @param server hostname:port or just hostname (hostname can be ip). */ private void connectToOneServerWithRetry(String server) { int sleep = 1000; while (!shutdown.get()) { try { client.createConnection(server); activeConnections.incrementAndGet(); log.info(String.format("Connected to VoltDB node at: %s.", server)); break; } catch (Exception e) { log.warn( String.format( "Connection to " + server + " failed - retrying in %d second(s).", sleep / 1000)); try { Thread.sleep(sleep); } catch (Exception interruted) { } if (sleep < 8000) sleep += sleep; } } }
private void processFragmentResponseDependencies(FragmentResponseMessage response) { int depCount = response.getTableCount(); for (int i = 0; i < depCount; i++) { int dependencyId = response.getTableDependencyIdAtIndex(i); VoltTable payload = response.getTableAtIndex(i); assert (payload != null); // if we're getting a dependency, i hope we know about it assert (m_missingDependencies != null); WorkUnit w = m_missingDependencies.get(dependencyId); if (w == null) { String msg = "Unable to find WorkUnit for dependency: " + dependencyId + " as part of TXN: " + txnId + " received from execution site: " + response.getExecutorSiteId(); hostLog.warn(msg); // throw new FragmentFailureException(); return; } // if the node is recovering, it doesn't matter if the payload matches if (response.isRecovering()) { w.putDummyDependency(dependencyId, response.getExecutorSiteId()); } else { w.putDependency( dependencyId, response.getExecutorSiteId(), payload, m_site.getSiteTracker()); } if (w.allDependenciesSatisfied()) { handleWorkUnitComplete(w); } } }
private void createSetupIv2( final String file_path, final String file_nonce, SnapshotFormat format, final long txnId, final Map<Integer, Long> partitionTransactionIds, String data, final SystemProcedureExecutionContext context, final VoltTable result, Map<String, Map<Integer, Pair<Long, Long>>> exportSequenceNumbers, SiteTracker tracker, HashinatorSnapshotData hashinatorData, long timestamp) { JSONObject jsData = null; if (data != null && !data.isEmpty()) { try { jsData = new JSONObject(data); } catch (JSONException e) { SNAP_LOG.error(String.format("JSON exception on snapshot data \"%s\".", data), e); } } SnapshotWritePlan plan; if (format == SnapshotFormat.NATIVE) { plan = new NativeSnapshotWritePlan(); } else if (format == SnapshotFormat.CSV) { plan = new CSVSnapshotWritePlan(); } else if (format == SnapshotFormat.STREAM) { plan = new StreamSnapshotWritePlan(); } else if (format == SnapshotFormat.INDEX) { plan = new IndexSnapshotWritePlan(); } else { throw new RuntimeException("BAD BAD BAD"); } final Callable<Boolean> deferredSetup = plan.createSetup( file_path, file_nonce, txnId, partitionTransactionIds, jsData, context, result, exportSequenceNumbers, tracker, hashinatorData, timestamp); m_deferredSetupFuture = VoltDB.instance() .submitSnapshotIOWork( new DeferredSnapshotSetup(plan, deferredSetup, txnId, partitionTransactionIds)); synchronized (m_createLock) { // Seems like this should be cleared out just in case // Log if there is actually anything to clear since it is unexpected if (!m_taskListsForHSIds.isEmpty()) { SNAP_LOG.warn("Found lingering snapshot tasks while setting up a snapshot"); } m_taskListsForHSIds.clear(); m_createSuccess.set(true); m_createResult.set(result); m_taskListsForHSIds.putAll(plan.getTaskListsForHSIds()); // HACK HACK HACK. If the task list is empty, this host has no work to do for // this snapshot. We're going to create an empty list of tasks for one of the sites to do // so that we'll have a SnapshotSiteProcessor which will do the logSnapshotCompleteToZK. if (m_taskListsForHSIds.isEmpty()) { SNAP_LOG.debug( "Node had no snapshot work to do. Creating a null task to drive completion."); m_taskListsForHSIds.put(context.getSiteId(), new ArrayDeque<SnapshotTableTask>()); } SNAP_LOG.debug( "Planned tasks: " + CoreUtils.hsIdCollectionToString(plan.getTaskListsForHSIds().keySet())); SNAP_LOG.debug( "Created tasks for HSIds: " + CoreUtils.hsIdCollectionToString(m_taskListsForHSIds.keySet())); } }
@Override public void run() { List<String> lineList; // if header option is true, check whether csv first line is valid if (m_config.header) { if (!checkHeader()) { m_log.error( "In the CSV file " + m_config.file + ", the header " + m_listReader.getUntokenizedRow() + " does not match " + "an existing column in the table " + m_config.table + "."); System.exit(-1); } } while ((m_config.limitrows-- > 0)) { if (m_errHandler.hasReachedErrorLimit()) { break; } try { // Initial setting of m_totalLineCount if (m_listReader.getLineNumber() == 0) { m_totalLineCount.set(m_config.skip); } else { m_totalLineCount.set(m_listReader.getLineNumber()); } long st = System.nanoTime(); lineList = m_listReader.read(); long end = System.nanoTime(); m_parsingTime += (end - st); if (lineList == null) { if (m_totalLineCount.get() > m_listReader.getLineNumber()) { m_totalLineCount.set(m_listReader.getLineNumber()); } break; } m_totalRowCount.incrementAndGet(); if (lineList.isEmpty()) { continue; } String[] lineValues = lineList.toArray(new String[0]); String lineCheckResult; String[] reorderValues = new String[m_columnCount]; if ((lineCheckResult = checkparams_trimspace_reorder(lineValues, reorderValues)) != null) { final RowWithMetaData metaData = new RowWithMetaData(m_listReader.getUntokenizedRow(), m_totalLineCount.get() + 1); if (m_errHandler.handleError(metaData, null, lineCheckResult)) { break; } continue; } RowWithMetaData lineData = new RowWithMetaData(m_listReader.getUntokenizedRow(), m_listReader.getLineNumber()); m_loader.insertRow(lineData, reorderValues); } catch (SuperCsvException e) { // Catch rows that can not be read by superCSV m_listReader. // e.g. items without quotes when strictquotes is enabled. final RowWithMetaData metaData = new RowWithMetaData(m_listReader.getUntokenizedRow(), m_totalLineCount.get() + 1); if (m_errHandler.handleError(metaData, null, e.getMessage())) { break; } } catch (IOException ex) { m_log.error("Failed to read CSV line from file: " + ex); break; } catch (InterruptedException e) { m_log.error("CSVLoader interrupted: " + e); break; } } // Now wait for processors to see endOfData and count down. After that drain to finish all // callbacks try { m_log.debug("Waiting for CSVDataLoader to finish."); m_loader.close(); m_log.debug("CSVDataLoader Done."); } catch (Exception ex) { m_log.warn( "Stopped processing because of connection error. " + "A report will be generated with what we processed so far. Error: " + ex); } }
/** * The only public method: do all the work to start a snapshot. Assumes that a snapshot is * feasible, that the caller has validated it can be accomplished, that the caller knows this is a * consistent or useful transaction point at which to snapshot. * * @param file_path * @param file_nonce * @param format * @param block * @param txnId * @param data * @param context * @param hostname * @return VoltTable describing the results of the snapshot attempt */ public VoltTable startSnapshotting( final String file_path, final String pathType, final String file_nonce, final SnapshotFormat format, final byte block, final long multiPartTxnId, final long partitionTxnId, final long legacyPerPartitionTxnIds[], final String data, final SystemProcedureExecutionContext context, final String hostname, final HashinatorSnapshotData hashinatorData, final long timestamp) { TRACE_LOG.trace("Creating snapshot target and handing to EEs"); final VoltTable result = SnapshotUtil.constructNodeResultsTable(); final int numLocalSites = context.getCluster().getDeployment().get("deployment").getSitesperhost(); JSONObject jsData = null; if (data != null && !data.isEmpty()) { try { jsData = new JSONObject(data); } catch (JSONException e) { SNAP_LOG.error(String.format("JSON exception on snapshot data \"%s\".", data), e); } } final JSONObject finalJsData = jsData; JSONObject perSiteRemoteDataCenterDrIds; try { perSiteRemoteDataCenterDrIds = ExtensibleSnapshotDigestData.serializeSiteConsumerDrIdTrackersToJSON( context.getDrAppliedTrackers()); } catch (JSONException e) { SNAP_LOG.warn("Failed to serialize the Remote DataCenter's Last applied DRIds"); perSiteRemoteDataCenterDrIds = new JSONObject(); } // One site wins the race to create the snapshot targets, populating // m_taskListsForHSIds for the other sites and creating an appropriate // number of snapshot permits. synchronized (SnapshotSiteProcessor.m_snapshotCreateLock) { SnapshotSiteProcessor.m_snapshotCreateSetupBarrierActualAction.set( new Runnable() { @Override public void run() { Map<Integer, Long> partitionTransactionIds = m_partitionLastSeenTransactionIds; SNAP_LOG.debug("Last seen partition transaction ids " + partitionTransactionIds); m_partitionLastSeenTransactionIds = new HashMap<Integer, Long>(); partitionTransactionIds.put(TxnEgo.getPartitionId(multiPartTxnId), multiPartTxnId); Map<Integer, JSONObject> remoteDataCenterLastIds = m_remoteDataCenterLastIds; m_remoteDataCenterLastIds = new HashMap<Integer, JSONObject>(); /* * Do a quick sanity check that the provided IDs * don't conflict with currently active partitions. If they do * it isn't fatal we can just skip it. */ for (long txnId : legacyPerPartitionTxnIds) { final int legacyPartition = TxnEgo.getPartitionId(txnId); if (partitionTransactionIds.containsKey(legacyPartition)) { SNAP_LOG.warn( "While saving a snapshot and propagating legacy " + "transaction ids found an id that matches currently active partition" + partitionTransactionIds.get(legacyPartition)); } else { partitionTransactionIds.put(legacyPartition, txnId); } } m_allLocalSiteSnapshotDigestData = new ExtensibleSnapshotDigestData( SnapshotSiteProcessor.getExportSequenceNumbers(), SnapshotSiteProcessor.getDRTupleStreamStateInfo(), remoteDataCenterLastIds); createSetupIv2( file_path, pathType, file_nonce, format, multiPartTxnId, partitionTransactionIds, finalJsData, context, result, m_allLocalSiteSnapshotDigestData, context.getSiteTrackerForSnapshot(), hashinatorData, timestamp); } }); // Create a barrier to use with the current number of sites to wait for // or if the barrier is already set up check if it is broken and reset if necessary SnapshotSiteProcessor.readySnapshotSetupBarriers(numLocalSites); // From within this EE, record the sequence numbers as of the start of the snapshot (now) // so that the info can be put in the digest. SnapshotSiteProcessor.populateSequenceNumbersForExecutionSite(context); Integer partitionId = TxnEgo.getPartitionId(partitionTxnId); SNAP_LOG.debug( "Registering transaction id " + partitionTxnId + " for " + TxnEgo.getPartitionId(partitionTxnId)); m_partitionLastSeenTransactionIds.put(partitionId, partitionTxnId); m_remoteDataCenterLastIds.put(partitionId, perSiteRemoteDataCenterDrIds); } boolean runPostTasks = false; VoltTable earlyResultTable = null; try { SnapshotSiteProcessor.m_snapshotCreateSetupBarrier.await(); try { synchronized (m_createLock) { SNAP_LOG.debug( "Found tasks for HSIds: " + CoreUtils.hsIdCollectionToString(m_taskListsForHSIds.keySet())); SNAP_LOG.debug("Looking for local HSID: " + CoreUtils.hsIdToString(context.getSiteId())); Deque<SnapshotTableTask> taskList = m_taskListsForHSIds.remove(context.getSiteId()); // If createSetup failed, then the first site to reach here is going // to send the results table generated by createSetup, and then empty out the table. // All other sites to reach here will send the appropriate empty table. // If createSetup was a success but the taskList is null, then we'll use the block // switch to figure out what flavor of empty SnapshotSave result table to return. if (!m_createSuccess.get()) { // There shouldn't be any work for any site if we failed assert (m_taskListsForHSIds.isEmpty()); VoltTable finalresult = m_createResult.get(); if (finalresult != null) { m_createResult.set(null); earlyResultTable = finalresult; } else { // We returned a non-empty NodeResultsTable with the failures in it, // every other site needs to return a NodeResultsTable as well. earlyResultTable = SnapshotUtil.constructNodeResultsTable(); } } else if (taskList == null) { SNAP_LOG.debug("No task for this site, block " + block); // This node is participating in the snapshot but this site has nothing to do. // Send back an appropriate empty table based on the block flag if (block != 0) { runPostTasks = true; earlyResultTable = SnapshotUtil.constructPartitionResultsTable(); earlyResultTable.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), "SUCCESS", ""); } else { // If doing snapshot for only replicated table(s), earlyResultTable here // may not be empty even if the taskList of this site is null. // In that case, snapshot result is preserved by earlyResultTable. earlyResultTable = result; } } else { context .getSiteSnapshotConnection() .initiateSnapshots( format, taskList, multiPartTxnId, m_allLocalSiteSnapshotDigestData); } if (m_deferredSetupFuture != null && taskList != null) { // Add a listener to the deferred setup so that it can kick off the snapshot // task once the setup is done. m_deferredSetupFuture.addListener( new Runnable() { @Override public void run() { DeferredSnapshotSetup deferredSnapshotSetup = null; try { deferredSnapshotSetup = m_deferredSetupFuture.get(); } catch (Exception e) { // it doesn't throw } assert deferredSnapshotSetup != null; context .getSiteSnapshotConnection() .startSnapshotWithTargets( deferredSnapshotSetup.getPlan().getSnapshotDataTargets()); } }, CoreUtils.SAMETHREADEXECUTOR); } } } finally { SnapshotSiteProcessor.m_snapshotCreateFinishBarrier.await(120, TimeUnit.SECONDS); } } catch (TimeoutException e) { VoltDB.crashLocalVoltDB( "Timed out waiting 120 seconds for all threads to arrive and start snapshot", true, null); } catch (InterruptedException e) { result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e)); earlyResultTable = result; } catch (BrokenBarrierException e) { result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e)); earlyResultTable = result; } catch (IllegalArgumentException e) { result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e)); earlyResultTable = result; } // If earlyResultTable is set, return here if (earlyResultTable != null) { if (runPostTasks) { // Need to run post-snapshot tasks before finishing SnapshotSiteProcessor.runPostSnapshotTasks(context); } return earlyResultTable; } if (block != 0) { HashSet<Exception> failures = Sets.newHashSet(); String status = "SUCCESS"; String err = ""; try { // For blocking snapshot, propogate the error from deferred setup back to the client final DeferredSnapshotSetup deferredSnapshotSetup = m_deferredSetupFuture.get(); if (deferredSnapshotSetup != null && deferredSnapshotSetup.getError() != null) { status = "FAILURE"; err = deferredSnapshotSetup.getError().toString(); failures.add(deferredSnapshotSetup.getError()); } failures.addAll(context.getSiteSnapshotConnection().completeSnapshotWork()); SnapshotSiteProcessor.runPostSnapshotTasks(context); } catch (Exception e) { status = "FAILURE"; err = e.toString(); failures.add(e); } final VoltTable blockingResult = SnapshotUtil.constructPartitionResultsTable(); if (failures.isEmpty()) { blockingResult.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), status, err); } else { status = "FAILURE"; for (Exception e : failures) { err = e.toString(); } blockingResult.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), status, err); } return blockingResult; } return result; }
// Offer a new message to the repair log. This will truncate // the repairLog if the message includes a truncation hint. public void deliver(VoltMessage msg) { if (!m_isLeader && msg instanceof Iv2InitiateTaskMessage) { final Iv2InitiateTaskMessage m = (Iv2InitiateTaskMessage) msg; // We can't repair read only SP transactions. Just don't log them to the repair log. if (m.isReadOnly()) { return; } m_lastSpHandle = m.getSpHandle(); truncate(m.getTruncationHandle(), IS_SP); m_logSP.add(new Item(IS_SP, m, m.getSpHandle(), m.getTxnId())); } else if (msg instanceof FragmentTaskMessage) { final FragmentTaskMessage m = (FragmentTaskMessage) msg; // We can't repair read only SP transactions. Just don't log them to the repair log. if (m.isReadOnly()) { return; } truncate(m.getTruncationHandle(), IS_MP); // only log the first fragment of a procedure (and handle 1st case) if (m.getTxnId() > m_lastMpHandle || m_lastMpHandle == Long.MAX_VALUE) { m_logMP.add(new Item(IS_MP, m, m.getSpHandle(), m.getTxnId())); m_lastMpHandle = m.getTxnId(); m_lastSpHandle = m.getSpHandle(); } } else if (msg instanceof CompleteTransactionMessage) { // a CompleteTransactionMessage which indicates restart is not the end of the // transaction. We don't want to log it in the repair log. CompleteTransactionMessage ctm = (CompleteTransactionMessage) msg; // We can't repair read only SP transactions. Just don't log them to the repair log. // Restart transaction do not need to be repaired here, don't log them as well. if (ctm.isReadOnly() || ctm.isRestart()) { return; } truncate(ctm.getTruncationHandle(), IS_MP); m_logMP.add(new Item(IS_MP, ctm, ctm.getSpHandle(), ctm.getTxnId())); // Restore will send a complete transaction message with a lower mp transaction id because // the restore transaction precedes the loading of the right mp transaction id from the // snapshot // Hence Math.max m_lastMpHandle = Math.max(m_lastMpHandle, ctm.getTxnId()); m_lastSpHandle = ctm.getSpHandle(); } else if (msg instanceof DumpMessage) { String who = CoreUtils.hsIdToString(m_HSId); tmLog.warn( "Repair log dump for site: " + who + ", isLeader: " + m_isLeader + ", " + who + ": lastSpHandle: " + m_lastSpHandle + ", lastMpHandle: " + m_lastMpHandle); for (Iv2RepairLogResponseMessage il : contents(0l, false)) { tmLog.warn("[Repair log contents]" + who + ": msg: " + il); } } else if (msg instanceof RepairLogTruncationMessage) { final RepairLogTruncationMessage truncateMsg = (RepairLogTruncationMessage) msg; truncate(truncateMsg.getHandle(), IS_SP); } }