public void registerFileInputTask( AbstractInputTask<?> inTask, Class<? extends DelimitedInputFormat> stubClass, String inPath, String delimiter) { DelimitedInputFormat format; try { format = stubClass.newInstance(); } catch (Throwable t) { throw new RuntimeException("Could not instantiate test input format.", t); } format.setFilePath(inPath); format.setDelimiter(delimiter); TaskConfig dsConfig = new TaskConfig(this.mockEnv.getTaskConfiguration()); dsConfig.setStubWrapper(new UserCodeObjectWrapper<DelimitedInputFormat>(format)); this.inputSplitProvider.addInputSplits(inPath, 5); inTask.setEnvironment(this.mockEnv); if (inTask instanceof DataSourceTask<?>) { ((DataSourceTask<?>) inTask).setUserCodeClassLoader(getClass().getClassLoader()); } inTask.registerInputOutput(); }
private static JobTaskVertex createMapper( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> serializer) { JobTaskVertex pointsInput = JobGraphUtils.createTask( RegularPactTask.class, "Map[DotProducts]", jobGraph, numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration()); taskConfig.setStubWrapper(new UserCodeClassWrapper<DotProducts>(DotProducts.class)); taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); taskConfig.setOutputSerializer(serializer); taskConfig.setDriver(CollectorMapDriver.class); taskConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); taskConfig.addInputToGroup(0); taskConfig.setInputLocalStrategy(0, LocalStrategy.NONE); taskConfig.setInputSerializer(serializer, 0); taskConfig.setBroadcastInputName("models", 0); taskConfig.addBroadcastInputToGroup(0); taskConfig.setBroadcastInputSerializer(serializer, 0); } return pointsInput; }
private static JobInputVertex createPointsInput( JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) { @SuppressWarnings("unchecked") CsvInputFormat pointsInFormat = new CsvInputFormat( '|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class); JobInputVertex pointsInput = JobGraphUtils.createInput( pointsInFormat, pointsPath, "[Points]", jobGraph, numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration()); taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); taskConfig.setOutputSerializer(serializer); TaskConfig chainedMapper = new TaskConfig(new Configuration()); chainedMapper.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); chainedMapper.setStubWrapper(new UserCodeObjectWrapper<PointBuilder>(new PointBuilder())); chainedMapper.addOutputShipStrategy(ShipStrategyType.FORWARD); chainedMapper.setOutputSerializer(serializer); taskConfig.addChainedTask(ChainedCollectorMapDriver.class, chainedMapper, "Build points"); } return pointsInput; }
public void registerFileOutputTask( AbstractOutputTask outTask, Class<? extends FileOutputFormat> stubClass, String outPath) { TaskConfig dsConfig = new TaskConfig(this.mockEnv.getTaskConfiguration()); dsConfig.setStubWrapper(new UserCodeClassWrapper<FileOutputFormat>(stubClass)); dsConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, outPath); outTask.setEnvironment(this.mockEnv); if (outTask instanceof DataSinkTask<?>) { ((DataSinkTask<?>) outTask).setUserCodeClassLoader(getClass().getClassLoader()); } outTask.registerInputOutput(); }
public void registerTask( AbstractTask task, @SuppressWarnings("rawtypes") Class<? extends PactDriver> driver, Class<? extends Stub> stubClass) { final TaskConfig config = new TaskConfig(this.mockEnv.getTaskConfiguration()); config.setDriver(driver); config.setStubWrapper(new UserCodeClassWrapper<Stub>(stubClass)); task.setEnvironment(this.mockEnv); if (task instanceof RegularPactTask<?, ?>) { ((RegularPactTask<?, ?>) task).setUserCodeClassLoader(getClass().getClassLoader()); } task.registerInputOutput(); }
private static JobTaskVertex createReducer( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> inputSerializer, TypeComparatorFactory<?> inputComparator, TypeSerializerFactory<?> outputSerializer) { // ---------------- the tail (co group) -------------------- JobTaskVertex tail = JobGraphUtils.createTask( IterationTailPactTask.class, "Reduce / Iteration Tail", jobGraph, numSubTasks, numSubTasks); TaskConfig tailConfig = new TaskConfig(tail.getConfiguration()); tailConfig.setIterationId(ITERATION_ID); tailConfig.setIsWorksetUpdate(); // inputs and driver tailConfig.setDriver(ReduceDriver.class); tailConfig.setDriverStrategy(DriverStrategy.SORTED_GROUP); tailConfig.addInputToGroup(0); tailConfig.setInputSerializer(inputSerializer, 0); tailConfig.setDriverComparator(inputComparator, 0); tailConfig.setInputLocalStrategy(0, LocalStrategy.SORT); tailConfig.setInputComparator(inputComparator, 0); tailConfig.setMemoryInput(0, MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE); tailConfig.setFilehandlesInput(0, 128); tailConfig.setSpillingThresholdInput(0, 0.9f); // output tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); tailConfig.setOutputSerializer(outputSerializer); // the udf tailConfig.setStubWrapper( new UserCodeObjectWrapper<RecomputeClusterCenter>(new RecomputeClusterCenter())); return tail; }
private static JobOutputVertex createOutput( JobGraph jobGraph, String resultPath, int numSubTasks, TypeSerializerFactory<?> serializer) { JobOutputVertex output = JobGraphUtils.createFileOutput(jobGraph, "Output", numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(output.getConfiguration()); taskConfig.addInputToGroup(0); taskConfig.setInputSerializer(serializer, 0); PointOutFormat outFormat = new PointOutFormat(); outFormat.setOutputFilePath(new Path(resultPath)); taskConfig.setStubWrapper(new UserCodeObjectWrapper<PointOutFormat>(outFormat)); } return output; }
private static JobOutputVertex createOutput( JobGraph jobGraph, String resultPath, int numSubTasks, TypeSerializerFactory<?> serializer) { JobOutputVertex output = JobGraphUtils.createFileOutput(jobGraph, "Output", numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(output.getConfiguration()); taskConfig.addInputToGroup(0); taskConfig.setInputSerializer(serializer, 0); @SuppressWarnings("unchecked") CsvOutputFormat outFormat = new CsvOutputFormat("\n", " ", LongValue.class, LongValue.class, LongValue.class); outFormat.setOutputFilePath(new Path(resultPath)); taskConfig.setStubWrapper(new UserCodeObjectWrapper<CsvOutputFormat>(outFormat)); } return output; }
private static JobTaskVertex createMapper( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> inputSerializer, TypeSerializerFactory<?> broadcastVarSerializer, TypeSerializerFactory<?> outputSerializer, TypeComparatorFactory<?> outputComparator) { JobTaskVertex mapper = JobGraphUtils.createTask( IterationIntermediatePactTask.class, "Map (Select nearest center)", jobGraph, numSubTasks, numSubTasks); TaskConfig intermediateConfig = new TaskConfig(mapper.getConfiguration()); intermediateConfig.setIterationId(ITERATION_ID); intermediateConfig.setDriver(CollectorMapDriver.class); intermediateConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); intermediateConfig.addInputToGroup(0); intermediateConfig.setInputSerializer(inputSerializer, 0); intermediateConfig.setOutputSerializer(outputSerializer); intermediateConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH); intermediateConfig.setOutputComparator(outputComparator, 0); intermediateConfig.setBroadcastInputName("centers", 0); intermediateConfig.addBroadcastInputToGroup(0); intermediateConfig.setBroadcastInputSerializer(broadcastVarSerializer, 0); // the udf intermediateConfig.setStubWrapper( new UserCodeObjectWrapper<SelectNearestCenter>(new SelectNearestCenter())); return mapper; }