protected boolean storeCheckpoint(DispatcherState curState, Checkpoint cp, SCN winScn) throws IOException { boolean debugEnabled = _log.isDebugEnabled(); if (debugEnabled) _log.debug("About to store checkpoint"); boolean success = true; // processBatch - returns false ; then ConsumerCallbackResult callbackResult = getAsyncCallback().onCheckpoint(winScn); boolean persistCheckpoint = !ConsumerCallbackResult.isSkipCheckpoint(callbackResult) && ConsumerCallbackResult.isSuccess(callbackResult); if (persistCheckpoint) { if (null != getCheckpointPersistor()) { getCheckpointPersistor().storeCheckpointV3(getSubsciptionsList(), cp, _registrationId); ++_numCheckPoints; } curState.storeCheckpoint(cp, winScn); removeEvents(curState); if (debugEnabled) _log.debug("Checkpoint saved: " + cp.toString()); } else { if (debugEnabled) _log.debug("Checkpoint " + cp + " not saved as callback returned " + callbackResult); } return success; }
public static Checkpoint createOnlineConsumptionCheckpoint( long lastCompleteWindowScn, long lastEowTsNsecs, DispatcherState curState, DbusEvent event) { // TODO: What does this mean? "For online consumption ; this means that a complete event window // hasn't been read yet." // So until we have support from RelayPuller resuming from mid-window ; there is no point in // trying to save a parital window long windowScn = lastCompleteWindowScn; if (windowScn < 0) { if (event.isCheckpointMessage()) { // control event; then safe to set to sequence; useful when relayPuller writes checkpoint to // buffer to // be passed on to bootstrapPuller windowScn = event.sequence(); // TODO: According to DbusEventFactory.createCheckpointEvent, event,sequence() is always 0! // Is this even executed? If we send a checkpoint event from the relay, we could be screwed! } else { // there's no sufficient data: not a single window has been processed. windowScn = event.sequence() > 0 ? event.sequence() - 1 : 0; // TODO Can't do this math for timestamp. See DDSDBUS-3149 } } return Checkpoint.createOnlineConsumptionCheckpoint(windowScn, lastEowTsNsecs); }
protected void doDispatchEvents() { boolean debugEnabled = _log.isDebugEnabled(); boolean traceEnabled = _log.isTraceEnabled(); // need to remove eventually but for now I want to avoid a nasty diff final DispatcherState curState = _internalState; // DbusEventIterator eventIter = curState.getEventsIterator(); if (!_stopDispatch.get() && !curState.getEventsIterator().hasNext() && !checkForShutdownRequest()) { if (debugEnabled) _log.debug("Waiting for events"); curState.getEventsIterator().await(50, TimeUnit.MILLISECONDS); } boolean success = true; boolean hasQueuedEvents = false; while (success && !_stopDispatch.get() && curState.getStateId() != DispatcherState.StateId.STOP_DISPATCH_EVENTS && null != curState.getEventsIterator() && curState.getEventsIterator().hasNext() && !checkForShutdownRequest() && // exit the event processing loop if there are other queued notifications !hasMessages()) { DbusEventInternalReadable nextEvent = curState.getEventsIterator().next(); _currentWindowSizeInBytes += nextEvent.size(); if (traceEnabled) _log.trace("Got event:" + nextEvent); Long eventSrcId = (long) nextEvent.srcId(); if (curState.isSCNRegress()) { SingleSourceSCN scn = new SingleSourceSCN(nextEvent.physicalPartitionId(), nextEvent.sequence()); _log.info("We are regressing to SCN: " + scn); curState.switchToRollback(); doRollback(curState, scn, false, false); curState.setSCNRegress(false); curState.switchToExpectEventWindow(); } if (null != getAsyncCallback().getStats()) getAsyncCallback() .getStats() .registerWindowSeen(nextEvent.timestampInNanos(), nextEvent.sequence()); if (nextEvent.isControlMessage()) { // control event if (nextEvent.isEndOfPeriodMarker()) { if (curState.isEventsSeen()) { if (null != curState.getCurrentSource()) { curState.switchToEndStreamSource(); success = doEndStreamSource(curState); } SCN endWinScn = null; if (success) { _lastWindowScn = nextEvent.sequence(); _lastEowTsNsecs = nextEvent.timestampInNanos(); endWinScn = new SingleSourceSCN(nextEvent.physicalPartitionId(), _lastWindowScn); curState.switchToEndStreamEventWindow(endWinScn); success = doEndStreamEventWindow(curState); } if (success) { try { // end of period event Checkpoint cp = createCheckpoint(curState, nextEvent); success = doStoreCheckpoint(curState, nextEvent, cp, endWinScn); } catch (SharedCheckpointException e) { // shutdown return; } } } else { // empty window success = true; if (_log.isDebugEnabled()) { _log.debug("skipping empty window: " + nextEvent.sequence()); } // write a checkpoint; takes care of slow sources ; but skip storing the first control // eop with 0 scn if (nextEvent.sequence() > 0) { _lastWindowScn = nextEvent.sequence(); // the first window (startEvents()) can have a eop whose sequence() is non-zero but // timestamp 0 e.g. in chained relay . // The reason is that the eop's timestamp is the max timestamp of all data events seen // so far. if (nextEvent.timestampInNanos() > 0) { _lastEowTsNsecs = nextEvent.timestampInNanos(); } Checkpoint ckpt = createCheckpoint(curState, nextEvent); try { success = doStoreCheckpoint( curState, nextEvent, ckpt, new SingleSourceSCN(nextEvent.physicalPartitionId(), nextEvent.sequence())); } catch (SharedCheckpointException e) { // shutdown return; } } else { _log.warn("EOP with scn=" + nextEvent.sequence()); } } if (success) { curState.switchToExpectEventWindow(); // we have recovered from the error and it's not the dummy window if (nextEvent.sequence() > 0) { if (!getStatus().isRunningStatus()) getStatus().resume(); } } } else if (nextEvent.isErrorEvent()) { _log.info("Error event: " + nextEvent.sequence()); success = processErrorEvent(curState, nextEvent); } else { // control event success = processSysEvent(curState, nextEvent); if (success) { if (nextEvent.isCheckpointMessage()) { Checkpoint sysCheckpt = createCheckpoint(curState, nextEvent); try { long scn = sysCheckpt.getConsumptionMode() == DbusClientMode.ONLINE_CONSUMPTION ? nextEvent.sequence() : sysCheckpt.getBootstrapSinceScn(); // ensure that control event with 0 scn doesn't get saved unless it is during // snapshot of bootstrap if (scn > 0 || sysCheckpt.getConsumptionMode() == DbusClientMode.BOOTSTRAP_SNAPSHOT) { success = doStoreCheckpoint( curState, nextEvent, sysCheckpt, new SingleSourceSCN(nextEvent.physicalPartitionId(), scn)); } } catch (SharedCheckpointException e) { // shutdown return; } } } } } else { curState.setEventsSeen(true); // not a control event if (curState.getStateId().equals(StateId.EXPECT_EVENT_WINDOW) || curState.getStateId().equals(StateId.REPLAY_DATA_EVENTS)) { SCN startScn = new SingleSourceSCN(nextEvent.physicalPartitionId(), nextEvent.sequence()); curState.switchToStartStreamEventWindow(startScn); success = doStartStreamEventWindow(curState); if (success && (eventSrcId.longValue() >= 0)) { success = doCheckStartSource(curState, eventSrcId, new SchemaId(nextEvent.schemaId())); } } else { if (null != curState.getCurrentSource() && !eventSrcId.equals(curState.getCurrentSource().getId())) { curState.switchToEndStreamSource(); success = doEndStreamSource(curState); } if (success) { // Check if schemas of the source exist. // Also check if the exact schema id present in event exists in the client. This is // worthwhile if there's a // guarantee that the entire window is written with the same schemaId, which is the case // if the relay does not use a new schema // mid-window success = doCheckStartSource(curState, eventSrcId, new SchemaId(nextEvent.schemaId())); } } if (success) { // finally: process data event success = processDataEvent(curState, nextEvent); if (success) { hasQueuedEvents = true; if (hasCheckpointThresholdBeenExceeded()) { _log.info( "Attempting to checkpoint (only if the consumer callback for onCheckpoint returns SUCCESS), because " + getCurrentWindowSizeInBytes() + " bytes reached without checkpoint "); success = processDataEventsBatch(curState); if (success) { hasQueuedEvents = false; // checkpoint: for bootstrap it's the right checkpoint; that has been lazily created // by a checkpoint event // checkpoint: for relay: create a checkpoint that has the prevScn Checkpoint cp = createCheckpoint(curState, nextEvent); // DDSDBUS-1889 : scn for bootstrap is bootstrapSinceSCN // scn for online consumption is : currentWindow SCN lastScn = cp.getConsumptionMode() == DbusClientMode.ONLINE_CONSUMPTION ? curState.getStartWinScn() : new SingleSourceSCN( nextEvent.physicalPartitionId(), cp.getBootstrapSinceScn()); try { // Even if storeCheckpoint fails, we // should continue (hoping for the best) success = doStoreCheckpoint(curState, nextEvent, cp, lastScn); } catch (SharedCheckpointException e) { // shutdown return; } curState.switchToExpectStreamDataEvents(); if (!getStatus().isRunningStatus()) getStatus().resume(); } } } } } if (success) { // check if threshold has been exceeded for control events; // DDSDBUS-1776 // this condition will take care of cases where checkpoint // persistence failed or onCheckpoint returned false // and the buffer was still left with events, at this juncture // we clear the buffer to make progress at the risk // of not being able to rollback should an error be encountered // before next successful checkpoint if (hasCheckpointThresholdBeenExceeded()) { // drain events just in case it hasn't been drained before; mainly control events that are // not checkpoint events success = processDataEventsBatch(curState); if (success) { _log.warn( "Checkpoint not stored, but removing older events from buffer to guarantee progress (checkpoint threshold has" + " exceeded), consider checkpointing more frequently. Triggered on control-event=" + nextEvent.isControlMessage()); // guarantee progress: risk being unable to rollback by // removing events, but hope for the best removeEvents(curState); } } } } if (!_stopDispatch.get() && !checkForShutdownRequest()) { if (success) { if (hasQueuedEvents) { success = processDataEventsBatch(curState); if (!success) { _log.error("Unable to flush partial window"); } } if (debugEnabled) _log.debug("doDispatchEvents to " + curState.toString()); } if (!success) { curState.switchToRollback(); doRollback(curState); } enqueueMessage(curState); // loop around -- let any other messages be processed } }
@Override protected DatabusRequest doProcess(DatabusRequest request) throws IOException, RequestProcessingException { BootstrapHttpStatsCollector bootstrapStatsCollector = _bootstrapServer.getBootstrapStatsCollector(); long startTime = System.currentTimeMillis(); String sources = request.getRequiredStringParam(SOURCES_PARAM); List<String> srcList = getSources(sources); Checkpoint ckpt = new Checkpoint(request.getRequiredStringParam(CHECKPOINT_PARAM)); LOG.info("StartSCN requested for sources : (" + sources + "). CheckPoint is :" + ckpt); long sinceScn = ckpt.getBootstrapSinceScn(); ObjectMapper mapper = new ObjectMapper(); StringWriter out = new StringWriter(1024); long startSCN = -1; BootstrapSCNProcessor processor = null; try { processor = new BootstrapSCNProcessor(_config, _bootstrapServer.getInboundEventStatisticsCollector()); List<SourceStatusInfo> srcStatusPairs = null; try { srcStatusPairs = processor.getSourceIdAndStatusFromName(srcList); startSCN = processor.getMinApplierWindowScn(sinceScn, srcStatusPairs); if (processor.shouldBypassSnapshot(sinceScn, startSCN, srcStatusPairs)) { LOG.info("Bootstrap Snapshot phase will be bypassed for startScn request :" + request); LOG.info( "Original startSCN is:" + startSCN + ", Setting startSCN to the sinceSCN:" + sinceScn); startSCN = sinceScn; } } catch (BootstrapDatabaseTooOldException tooOldException) { if (bootstrapStatsCollector != null) { bootstrapStatsCollector.registerErrStartSCN(); bootstrapStatsCollector.registerErrDatabaseTooOld(); } LOG.error("The bootstrap database is too old!", tooOldException); throw new RequestProcessingException(tooOldException); } catch (SQLException e) { if (bootstrapStatsCollector != null) { bootstrapStatsCollector.registerErrStartSCN(); bootstrapStatsCollector.registerErrSqlException(); } LOG.error("Error encountered while fetching startSCN from database.", e); throw new RequestProcessingException(e); } mapper.writeValue(out, String.valueOf(startSCN)); byte[] resultBytes = out.toString().getBytes(); request.getResponseContent().write(ByteBuffer.wrap(resultBytes)); LOG.info("startSCN: " + startSCN + "with server Info :" + _serverHostPort); } catch (Exception ex) { LOG.error("Got exception while calculating startSCN", ex); throw new RequestProcessingException(ex); } finally { if (null != processor) processor.shutdown(); } if (bootstrapStatsCollector != null) { bootstrapStatsCollector.registerStartSCNReq(System.currentTimeMillis() - startTime); } return request; }