public void restoreStateLazy() throws Exception { if (lazyRestoreState != null) { LOG.info("Restoring checkpointed state to task {}", getName()); try { final StreamOperator<?>[] allOperators = operatorChain.getAllOperators(); final StreamTaskState[] states = lazyRestoreState.getState(userClassLoader); // be GC friendly lazyRestoreState = null; for (int i = 0; i < states.length; i++) { StreamTaskState state = states[i]; StreamOperator<?> operator = allOperators[i]; if (state != null && operator != null) { LOG.debug("Task {} in chain ({}) has checkpointed state", i, getName()); operator.restoreState(state); } else if (operator != null) { LOG.debug("Task {} in chain ({}) does not have checkpointed state", i, getName()); } } } catch (Exception e) { throw new Exception("Could not restore checkpointed state to operators and functions", e); } } }
private void tryDisposeAllOperators() throws Exception { for (StreamOperator<?> operator : operatorChain.getAllOperators()) { if (operator != null) { operator.dispose(); } } }
private void openAllOperators() throws Exception { for (StreamOperator<?> operator : operatorChain.getAllOperators()) { if (operator != null) { operator.open(); } } }
@Override public void doWork() throws OperatorException { clearAllInnerSinks(); inputExtender.passDataThrough(); super.doWork(); outputExtender.passDataThrough(); }
@Override public final void registerInputOutput() throws Exception { LOG.debug("registerInputOutput for {}", getName()); boolean initializationCompleted = false; try { AccumulatorRegistry accumulatorRegistry = getEnvironment().getAccumulatorRegistry(); userClassLoader = getUserCodeClassLoader(); configuration = new StreamConfig(getTaskConfiguration()); accumulatorMap = accumulatorRegistry.getUserMap(); stateBackend = createStateBackend(); stateBackend.initializeForJob(getEnvironment().getJobID()); headOperator = configuration.getStreamOperator(userClassLoader); operatorChain = new OperatorChain<>(this, headOperator, accumulatorRegistry.getReadWriteReporter()); if (headOperator != null) { headOperator.setup(this, configuration, operatorChain.getChainEntryPoint()); } timerService = Executors.newSingleThreadScheduledExecutor( new DispatcherThreadFactory(TRIGGER_THREAD_GROUP, "Time Trigger for " + getName())); // task specific initialization init(); initializationCompleted = true; } finally { if (!initializationCompleted) { if (timerService != null) { timerService.shutdownNow(); } if (operatorChain != null) { operatorChain.releaseOutputs(); } } } }
private void disposeAllOperators() { for (StreamOperator<?> operator : operatorChain.getAllOperators()) { try { if (operator != null) { operator.dispose(); } } catch (Throwable t) { LOG.error("Error during disposal of stream operator.", t); } } }
private void closeAllOperators() throws Exception { // We need to close them first to last, since upstream operators in the chain might emit // elements in their close methods. StreamOperator<?>[] allOperators = operatorChain.getAllOperators(); for (int i = allOperators.length - 1; i >= 0; i--) { StreamOperator<?> operator = allOperators[i]; if (operator != null) { operator.close(); } } }
@Override public void triggerCheckpoint(long checkpointId, long timestamp) throws Exception { LOG.debug("Starting checkpoint {} on task {}", checkpointId, getName()); synchronized (lock) { if (isRunning) { // since both state checkpointing and downstream barrier emission occurs in this // lock scope, they are an atomic operation regardless of the order in which they occur // we immediately emit the checkpoint barriers, so the downstream operators can start // their checkpoint work as soon as possible operatorChain.broadcastCheckpointBarrier(checkpointId, timestamp); // now draw the state snapshot try { final StreamOperator<?>[] allOperators = operatorChain.getAllOperators(); final StreamTaskState[] states = new StreamTaskState[allOperators.length]; for (int i = 0; i < states.length; i++) { StreamOperator<?> operator = allOperators[i]; if (operator != null) { StreamTaskState state = operator.snapshotOperatorState(checkpointId, timestamp); states[i] = state.isEmpty() ? null : state; } } StreamTaskStateList allStates = new StreamTaskStateList(states); if (allStates.isEmpty()) { getEnvironment().acknowledgeCheckpoint(checkpointId); } else { getEnvironment().acknowledgeCheckpoint(checkpointId, allStates); } } catch (Exception e) { if (isRunning) { throw e; } } } } }
@Override public void notifyCheckpointComplete(long checkpointId) throws Exception { synchronized (lock) { if (isRunning) { LOG.debug("Notification of complete checkpoint for task {}", getName()); for (StreamOperator<?> operator : operatorChain.getAllOperators()) { if (operator != null) { operator.notifyOfCompletedCheckpoint(checkpointId); } } } else { LOG.debug( "Ignoring notification of complete checkpoint for not-running task {}", getName()); } } }
public ExecutionUnit(OperatorChain enclosingOperator, String name) { this.name = name; this.enclosingOperator = enclosingOperator; innerInputPorts.addObserver(delegatingPortObserver, false); innerOutputPorts.addObserver(delegatingPortObserver, false); // innerInputPorts.addObserver(portObserver, false); // innerOutputPorts.addObserver(portObserver, false); int index = 0; do { char c = name.charAt(index); if (!(Character.isUpperCase(c) || Character.isDigit(c))) { // LogService.getRoot().warning("Process name does not follow naming conventions: "+name+" // (in "+enclosingOperator.getOperatorDescription().getName()+")"); LogService.getRoot() .log( Level.WARNING, "com.rapidminer.operator.ExecutionUnit.process_name_does_not_follow_name_conventions", new Object[] {name, enclosingOperator.getOperatorDescription().getName()}); } index = name.indexOf(' ', index) + 1; } while (index != 0); }
@Override public void notifyCheckpointComplete(long checkpointId) throws Exception { synchronized (lock) { if (isRunning) { LOG.debug("Notification of complete checkpoint for task {}", getName()); // We first notify the state backend if necessary if (stateBackend instanceof CheckpointNotifier) { ((CheckpointNotifier) stateBackend).notifyCheckpointComplete(checkpointId); } for (StreamOperator<?> operator : operatorChain.getAllOperators()) { if (operator != null) { operator.notifyOfCompletedCheckpoint(checkpointId); } } } else { LOG.debug( "Ignoring notification of complete checkpoint for not-running task {}", getName()); } } }
public RecordWriterOutput<?>[] getStreamOutputs() { return operatorChain.getStreamOutputs(); }
public Output<StreamRecord<OUT>> getHeadOutput() { return operatorChain.getChainEntryPoint(); }
@Override public final void invoke() throws Exception { LOG.debug("Invoking {}", getName()); boolean disposed = false; try { // first order of business is to ive operators back their state restoreStateLazy(); // we need to make sure that any triggers scheduled in open() cannot be // executed before all operators are opened synchronized (lock) { openAllOperators(); } // let the task do its work isRunning = true; run(); isRunning = false; if (LOG.isDebugEnabled()) { LOG.debug("Finished task {}", getName()); } // make sure no further checkpoint and notification actions happen. // we make sure that no other thread is currently in the locked scope before // we close the operators by trying to acquire the checkpoint scope lock // we also need to make sure that no triggers fire concurrently with the close logic synchronized (lock) { // this is part of the main logic, so if this fails, the task is considered failed closeAllOperators(); } // make sure all buffered data is flushed operatorChain.flushOutputs(); // make an attempt to dispose the operators such that failures in the dispose call // still let the computation fail tryDisposeAllOperators(); disposed = true; } finally { isRunning = false; timerService.shutdownNow(); // release the output resources. this method should never fail. if (operatorChain != null) { operatorChain.releaseOutputs(); } // we must! perform this cleanup try { cleanup(); } catch (Throwable t) { // catch and log the exception to not replace the original exception LOG.error("Error during cleanup of stream task."); } // if the operators were not disposed before, do a hard dispose if (!disposed) { disposeAllOperators(); } try { if (stateBackend != null) { stateBackend.close(); } } catch (Throwable t) { LOG.error("Error while closing the state backend", t); } } }
@Override @SuppressWarnings("unchecked,rawtypes") public boolean triggerCheckpoint(final long checkpointId, final long timestamp) throws Exception { LOG.debug("Starting checkpoint {} on task {}", checkpointId, getName()); synchronized (lock) { if (isRunning) { // since both state checkpointing and downstream barrier emission occurs in this // lock scope, they are an atomic operation regardless of the order in which they occur // we immediately emit the checkpoint barriers, so the downstream operators can start // their checkpoint work as soon as possible operatorChain.broadcastCheckpointBarrier(checkpointId, timestamp); // now draw the state snapshot try { final StreamOperator<?>[] allOperators = operatorChain.getAllOperators(); final StreamTaskState[] states = new StreamTaskState[allOperators.length]; boolean hasAsyncStates = false; for (int i = 0; i < states.length; i++) { StreamOperator<?> operator = allOperators[i]; if (operator != null) { StreamTaskState state = operator.snapshotOperatorState(checkpointId, timestamp); if (state.getOperatorState() instanceof AsynchronousStateHandle) { hasAsyncStates = true; } if (state.getFunctionState() instanceof AsynchronousStateHandle) { hasAsyncStates = true; } states[i] = state.isEmpty() ? null : state; } } StreamTaskStateList allStates = new StreamTaskStateList(states); if (allStates.isEmpty()) { getEnvironment().acknowledgeCheckpoint(checkpointId); } else if (!hasAsyncStates) { getEnvironment().acknowledgeCheckpoint(checkpointId, allStates); } else { // start a Thread that does the asynchronous materialization and // then sends the checkpoint acknowledge Thread checkpointThread = new Thread() { @Override public void run() { try { for (StreamTaskState state : states) { if (state != null) { if (state.getFunctionState() instanceof AsynchronousStateHandle) { AsynchronousStateHandle<?> asyncState = (AsynchronousStateHandle<?>) state.getFunctionState(); state.setFunctionState((StateHandle) asyncState.materialize()); } if (state.getOperatorState() instanceof AsynchronousStateHandle) { AsynchronousStateHandle<?> asyncState = (AsynchronousStateHandle<?>) state.getOperatorState(); state.setOperatorState((StateHandle) asyncState.materialize()); } } } StreamTaskStateList allStates = new StreamTaskStateList(states); getEnvironment().acknowledgeCheckpoint(checkpointId, allStates); } catch (Exception e) { LOG.error( "Caught exception while materializing asynchronous checkpoints.", e); if (asyncException == null) { asyncException = new AsynchronousException(e); } } asyncCheckpointThreads.remove(this); LOG.debug( "Finished asynchronous checkpoints for checkpoint {} on task {}", checkpointId, getName()); } }; asyncCheckpointThreads.add(checkpointThread); checkpointThread.start(); } } catch (Exception e) { if (isRunning) { throw e; } } return true; } else { return false; } } }