@SuppressWarnings({"unchecked", "rawtypes"}) static void addStormConfigToTopology(FlinkTopology topology, Map conf) throws ClassNotFoundException { if (conf != null) { ExecutionConfig flinkConfig = topology.getExecutionEnvironment().getConfig(); flinkConfig.setGlobalJobParameters(new StormConfig(conf)); // add all registered types to ExecutionConfig List<?> registeredClasses = (List<?>) conf.get(Config.TOPOLOGY_KRYO_REGISTER); if (registeredClasses != null) { for (Object klass : registeredClasses) { if (klass instanceof String) { flinkConfig.registerKryoType(Class.forName((String) klass)); } else { for (Entry<String, String> register : ((Map<String, String>) klass).entrySet()) { flinkConfig.registerTypeWithKryoSerializer( Class.forName(register.getKey()), (Class<? extends Serializer<?>>) Class.forName(register.getValue())); } } } } } }
@Override public void prepare() { ExecutionConfig executionConfig = taskContext.getExecutionConfig(); this.objectReuseEnabled = executionConfig.isObjectReuseEnabled(); if (LOG.isDebugEnabled()) { LOG.debug( "NoOpDriver object reuse: " + (this.objectReuseEnabled ? "ENABLED" : "DISABLED") + "."); } }
@Parameterized.Parameters public static Collection<Object[]> getConfigurations() throws FileNotFoundException, IOException { LinkedList<Object[]> configs = new LinkedList<Object[]>(); ExecutionConfig withReuse = new ExecutionConfig(); withReuse.enableObjectReuse(); ExecutionConfig withoutReuse = new ExecutionConfig(); withoutReuse.disableObjectReuse(); Object[] a = {withoutReuse}; configs.add(a); Object[] b = {withReuse}; configs.add(b); return configs; }
@Test public void testJoinPlain() { final FlatJoinFunction<String, String, Integer> joiner = new FlatJoinFunction<String, String, Integer>() { @Override public void join(String first, String second, Collector<Integer> out) throws Exception { out.collect(first.length()); out.collect(second.length()); } }; @SuppressWarnings({"rawtypes", "unchecked"}) JoinOperatorBase<String, String, Integer, FlatJoinFunction<String, String, Integer>> base = new JoinOperatorBase( joiner, new BinaryOperatorInformation( BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), new int[0], new int[0], "TestJoiner"); List<String> inputData1 = new ArrayList<String>(Arrays.asList("foo", "bar", "foobar")); List<String> inputData2 = new ArrayList<String>(Arrays.asList("foobar", "foo")); List<Integer> expected = new ArrayList<Integer>(Arrays.asList(3, 3, 6, 6)); try { ExecutionConfig executionConfig = new ExecutionConfig(); executionConfig.disableObjectReuse(); List<Integer> resultSafe = base.executeOnCollections(inputData1, inputData2, null, executionConfig); executionConfig.enableObjectReuse(); List<Integer> resultRegular = base.executeOnCollections(inputData1, inputData2, null, executionConfig); assertEquals(expected, resultSafe); assertEquals(expected, resultRegular); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
public void run(final Object lockingObject, final Output<StreamRecord<T>> collector) throws Exception { final ExecutionConfig executionConfig = getExecutionConfig(); if (userFunction instanceof EventTimeSourceFunction) { ctx = new ManualWatermarkContext<T>(lockingObject, collector); } else if (executionConfig.getAutoWatermarkInterval() > 0) { ctx = new AutomaticWatermarkContext<T>(lockingObject, collector, executionConfig); } else if (executionConfig.areTimestampsEnabled()) { ctx = new NonWatermarkContext<T>(lockingObject, collector); } else { ctx = new NonTimestampContext<T>(lockingObject, collector); } userFunction.run(ctx); // This will mostly emit a final +Inf Watermark to make the Watermark logic work // when some sources finish before others do ctx.close(); }
public AutomaticWatermarkContext( Object lockingObjectParam, Output<StreamRecord<T>> outputParam, ExecutionConfig executionConfig) { this.lockingObject = lockingObjectParam; this.output = outputParam; this.reuse = new StreamRecord<T>(null); watermarkInterval = executionConfig.getAutoWatermarkInterval(); scheduleExecutor = Executors.newScheduledThreadPool(1); watermarkTimer = scheduleExecutor.scheduleAtFixedRate( new Runnable() { @Override public void run() { long currentTime = System.currentTimeMillis(); // align the watermarks across all machines. this will ensure that we // don't have watermarks that creep along at different intervals because // the machine clocks are out of sync long watermarkTime = currentTime - (currentTime % watermarkInterval); if (currentTime > watermarkTime && watermarkTime - lastWatermarkTime >= watermarkInterval) { synchronized (lockingObject) { if (currentTime > watermarkTime && watermarkTime - lastWatermarkTime >= watermarkInterval) { output.emitWatermark(new Watermark(watermarkTime)); lastWatermarkTime = watermarkTime; } } } } }, 0, watermarkInterval, TimeUnit.MILLISECONDS); }
@Test public void testJoinRich() { final AtomicBoolean opened = new AtomicBoolean(false); final AtomicBoolean closed = new AtomicBoolean(false); final String taskName = "Test rich join function"; final RichFlatJoinFunction<String, String, Integer> joiner = new RichFlatJoinFunction<String, String, Integer>() { @Override public void open(Configuration parameters) throws Exception { opened.compareAndSet(false, true); assertEquals(0, getRuntimeContext().getIndexOfThisSubtask()); assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks()); } @Override public void close() throws Exception { closed.compareAndSet(false, true); } @Override public void join(String first, String second, Collector<Integer> out) throws Exception { out.collect(first.length()); out.collect(second.length()); } }; JoinOperatorBase<String, String, Integer, RichFlatJoinFunction<String, String, Integer>> base = new JoinOperatorBase< String, String, Integer, RichFlatJoinFunction<String, String, Integer>>( joiner, new BinaryOperatorInformation<String, String, Integer>( BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), new int[0], new int[0], taskName); final List<String> inputData1 = new ArrayList<String>(Arrays.asList("foo", "bar", "foobar")); final List<String> inputData2 = new ArrayList<String>(Arrays.asList("foobar", "foo")); final List<Integer> expected = new ArrayList<Integer>(Arrays.asList(3, 3, 6, 6)); try { final HashMap<String, Accumulator<?, ?>> accumulatorMap = new HashMap<String, Accumulator<?, ?>>(); final HashMap<String, Future<Path>> cpTasks = new HashMap<>(); ExecutionConfig executionConfig = new ExecutionConfig(); executionConfig.disableObjectReuse(); List<Integer> resultSafe = base.executeOnCollections( inputData1, inputData2, new RuntimeUDFContext(taskName, 1, 0, null, executionConfig, cpTasks, accumulatorMap), executionConfig); executionConfig.enableObjectReuse(); List<Integer> resultRegular = base.executeOnCollections( inputData1, inputData2, new RuntimeUDFContext(taskName, 1, 0, null, executionConfig, cpTasks, accumulatorMap), executionConfig); assertEquals(expected, resultSafe); assertEquals(expected, resultRegular); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } assertTrue(opened.get()); assertTrue(closed.get()); }
@Override public void invoke() throws Exception { if (LOG.isDebugEnabled()) { LOG.debug(getLogString("Starting data sink operator")); } ExecutionConfig executionConfig; try { ExecutionConfig c = (ExecutionConfig) InstantiationUtil.readObjectFromConfig( getJobConfiguration(), ExecutionConfig.CONFIG_KEY, getUserCodeClassLoader()); if (c != null) { executionConfig = c; } else { LOG.warn("The execution config returned by the configuration was null"); executionConfig = new ExecutionConfig(); } } catch (IOException e) { throw new RuntimeException("Could not load ExecutionConfig from Job Configuration: " + e); } catch (ClassNotFoundException e) { throw new RuntimeException("Could not load ExecutionConfig from Job Configuration: " + e); } boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled(); try { // initialize local strategies MutableObjectIterator<IT> input1; switch (this.config.getInputLocalStrategy(0)) { case NONE: // nothing to do localStrategy = null; input1 = reader; break; case SORT: // initialize sort local strategy try { // get type comparator TypeComparatorFactory<IT> compFact = this.config.getInputComparator(0, getUserCodeClassLoader()); if (compFact == null) { throw new Exception("Missing comparator factory for local strategy on input " + 0); } // initialize sorter UnilateralSortMerger<IT> sorter = new UnilateralSortMerger<IT>( getEnvironment().getMemoryManager(), getEnvironment().getIOManager(), this.reader, this, this.inputTypeSerializerFactory, compFact.createComparator(), this.config.getRelativeMemoryInput(0), this.config.getFilehandlesInput(0), this.config.getSpillingThresholdInput(0)); this.localStrategy = sorter; input1 = sorter.getIterator(); } catch (Exception e) { throw new RuntimeException( "Initializing the input processing failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e); } break; default: throw new RuntimeException("Invalid local strategy for DataSinkTask"); } // read the reader and write it to the output final TypeSerializer<IT> serializer = this.inputTypeSerializerFactory.getSerializer(); final MutableObjectIterator<IT> input = input1; final OutputFormat<IT> format = this.format; // check if task has been canceled if (this.taskCanceled) { return; } if (LOG.isDebugEnabled()) { LOG.debug(getLogString("Starting to produce output")); } // open format.open( this.getEnvironment().getIndexInSubtaskGroup(), this.getEnvironment().getNumberOfSubtasks()); if (objectReuseEnabled) { IT record = serializer.createInstance(); // work! while (!this.taskCanceled && ((record = input.next(record)) != null)) { format.writeRecord(record); } } else { IT record; // work! while (!this.taskCanceled && ((record = input.next()) != null)) { format.writeRecord(record); } } // close. We close here such that a regular close throwing an exception marks a task as // failed. if (!this.taskCanceled) { this.format.close(); this.format = null; } } catch (Exception ex) { // make a best effort to clean up try { if (!cleanupCalled && format instanceof CleanupWhenUnsuccessful) { cleanupCalled = true; ((CleanupWhenUnsuccessful) format).tryCleanupOnError(); } } catch (Throwable t) { LOG.error("Cleanup on error failed.", t); } ex = ExceptionInChainedStubException.exceptionUnwrap(ex); if (ex instanceof CancelTaskException) { // forward canceling exception throw ex; } // drop, if the task was canceled else if (!this.taskCanceled) { if (LOG.isErrorEnabled()) { LOG.error(getLogString("Error in user code: " + ex.getMessage()), ex); } throw ex; } } finally { if (this.format != null) { // close format, if it has not been closed, yet. // This should only be the case if we had a previous error, or were canceled. try { this.format.close(); } catch (Throwable t) { if (LOG.isWarnEnabled()) { LOG.warn(getLogString("Error closing the output format"), t); } } } // close local strategy if necessary if (localStrategy != null) { try { this.localStrategy.close(); } catch (Throwable t) { LOG.error("Error closing local strategy", t); } } RegularPactTask.clearReaders(new MutableReader[] {inputReader}); } if (!this.taskCanceled) { if (LOG.isDebugEnabled()) { LOG.debug(getLogString("Finished data sink operator")); } } else { if (LOG.isDebugEnabled()) { LOG.debug(getLogString("Data sink operator cancelled")); } } }
@Override public void prepare() throws Exception { final TaskConfig config = this.taskContext.getTaskConfig(); if (config.getDriverStrategy() != DriverStrategy.CO_GROUP) { throw new Exception( "Unrecognized driver strategy for CoGoup driver: " + config.getDriverStrategy().name()); } final MutableObjectIterator<IT1> in1 = this.taskContext.getInput(0); final MutableObjectIterator<IT2> in2 = this.taskContext.getInput(1); // get the key positions and types final TypeSerializer<IT1> serializer1 = this.taskContext.<IT1>getInputSerializer(0).getSerializer(); final TypeSerializer<IT2> serializer2 = this.taskContext.<IT2>getInputSerializer(1).getSerializer(); final TypeComparator<IT1> groupComparator1 = this.taskContext.getDriverComparator(0); final TypeComparator<IT2> groupComparator2 = this.taskContext.getDriverComparator(1); final TypePairComparatorFactory<IT1, IT2> pairComparatorFactory = config.getPairComparatorFactory(this.taskContext.getUserCodeClassLoader()); if (pairComparatorFactory == null) { throw new Exception("Missing pair comparator factory for CoGroup driver"); } ExecutionConfig executionConfig = taskContext.getExecutionConfig(); this.objectReuseEnabled = executionConfig.isObjectReuseEnabled(); if (LOG.isDebugEnabled()) { LOG.debug( "CoGroupDriver object reuse: " + (this.objectReuseEnabled ? "ENABLED" : "DISABLED") + "."); } if (objectReuseEnabled) { // create CoGropuTaskIterator according to provided local strategy. this.coGroupIterator = new ReusingSortMergeCoGroupIterator<IT1, IT2>( in1, in2, serializer1, groupComparator1, serializer2, groupComparator2, pairComparatorFactory.createComparator12(groupComparator1, groupComparator2)); } else { // create CoGropuTaskIterator according to provided local strategy. this.coGroupIterator = new NonReusingSortMergeCoGroupIterator<IT1, IT2>( in1, in2, serializer1, groupComparator1, serializer2, groupComparator2, pairComparatorFactory.createComparator12(groupComparator1, groupComparator2)); } // open CoGroupTaskIterator - this triggers the sorting and blocks until the iterator is ready this.coGroupIterator.open(); if (LOG.isDebugEnabled()) { LOG.debug(this.taskContext.formatLogString("CoGroup task iterator ready.")); } }