private static JobOutputVertex createSync(JobGraph jobGraph, int numIterations, int dop) { JobOutputVertex sync = JobGraphUtils.createSync(jobGraph, dop); TaskConfig syncConfig = new TaskConfig(sync.getConfiguration()); syncConfig.setNumberOfIterations(numIterations); syncConfig.setIterationId(ITERATION_ID); return sync; }
public void registerFileInputTask( AbstractInputTask<?> inTask, Class<? extends DelimitedInputFormat> stubClass, String inPath, String delimiter) { DelimitedInputFormat format; try { format = stubClass.newInstance(); } catch (Throwable t) { throw new RuntimeException("Could not instantiate test input format.", t); } format.setFilePath(inPath); format.setDelimiter(delimiter); TaskConfig dsConfig = new TaskConfig(this.mockEnv.getTaskConfiguration()); dsConfig.setStubWrapper(new UserCodeObjectWrapper<DelimitedInputFormat>(format)); this.inputSplitProvider.addInputSplits(inPath, 5); inTask.setEnvironment(this.mockEnv); if (inTask instanceof DataSourceTask<?>) { ((DataSourceTask<?>) inTask).setUserCodeClassLoader(getClass().getClassLoader()); } inTask.registerInputOutput(); }
/* * (non-Javadoc) * * @see eu.stratosphere.pact.runtime.task.AbstractPactTask#prepare() */ @Override public void prepare() throws Exception { final TaskConfig config = this.taskContext.getTaskConfig(); // set up memory and I/O parameters final long availableMemory = config.getMemorySize(); // test minimum memory requirements LocalStrategy ls = config.getLocalStrategy(); long strategyMinMem = 0; switch (ls) { case COMBININGSORT: strategyMinMem = MIN_REQUIRED_MEMORY; break; } if (availableMemory < strategyMinMem) { throw new RuntimeException( "The Combine task was initialized with too little memory for local strategy " + config.getLocalStrategy() + " : " + availableMemory + " bytes." + "Required is at least " + strategyMinMem + " bytes."); } // obtain the TaskManager's MemoryManager final MemoryManager memoryManager = this.taskContext.getMemoryManager(); final MutableObjectIterator<T> in = this.taskContext.getInput(0); this.serializer = this.taskContext.getInputSerializer(0); this.comparator = this.taskContext.getInputComparator(0); switch (ls) { // local strategy is COMBININGSORT // The Input is combined using a sort-merge strategy. Before spilling on disk, the data // volume is reduced using // the combine() method of the ReduceStub. // An iterator on the sorted, grouped, and combined pairs is created and returned case COMBININGSORT: input = new AsynchronousPartialSorter<T>( memoryManager, in, this.taskContext.getOwningNepheleTask(), this.serializer, this.comparator.duplicate(), availableMemory); break; // obtain and return a grouped iterator from the combining sort-merger default: throw new RuntimeException("Invalid local strategy provided for CombineTask."); } }
private static JobInputVertex createPointsInput( JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) { @SuppressWarnings("unchecked") CsvInputFormat pointsInFormat = new CsvInputFormat( '|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class); JobInputVertex pointsInput = JobGraphUtils.createInput( pointsInFormat, pointsPath, "[Points]", jobGraph, numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration()); taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); taskConfig.setOutputSerializer(serializer); TaskConfig chainedMapper = new TaskConfig(new Configuration()); chainedMapper.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); chainedMapper.setStubWrapper(new UserCodeObjectWrapper<PointBuilder>(new PointBuilder())); chainedMapper.addOutputShipStrategy(ShipStrategyType.FORWARD); chainedMapper.setOutputSerializer(serializer); taskConfig.addChainedTask(ChainedCollectorMapDriver.class, chainedMapper, "Build points"); } return pointsInput; }
@Test public void testFailingMapTask() { int keyCnt = 100; int valCnt = 20; try { // environment initEnvironment(3 * 1024 * 1024); addInput(new UniformPactRecordGenerator(keyCnt, valCnt, false), 0); addOutput(this.outList); // chained combine config { final TaskConfig combineConfig = new TaskConfig(new Configuration()); // input combineConfig.addInputToGroup(0); combineConfig.setInputSerializer(serFact, 0); // output combineConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); combineConfig.setOutputSerializer(serFact); // driver combineConfig.setDriverStrategy(DriverStrategy.PARTIAL_GROUP); combineConfig.setDriverComparator(compFact, 0); combineConfig.setMemoryDriver(3 * 1024 * 1024); // udf combineConfig.setStubClass(MockFailingCombineStub.class); getTaskConfig().addChainedTask(ChainedCombineDriver.class, combineConfig, "combine"); } // chained map+combine { final RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord> testTask = new RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord>(); super.registerTask(testTask, MapDriver.class, MockMapStub.class); boolean stubFailed = false; try { testTask.invoke(); } catch (Exception e) { stubFailed = true; } Assert.assertTrue("Stub exception was not forwarded.", stubFailed); } } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
@Test public void testMapTask() { final int keyCnt = 100; final int valCnt = 20; try { // environment initEnvironment(3 * 1024 * 1024); addInput(new UniformPactRecordGenerator(keyCnt, valCnt, false), 0); addOutput(this.outList); // chained combine config { final TaskConfig combineConfig = new TaskConfig(new Configuration()); // input combineConfig.addInputToGroup(0); combineConfig.setInputSerializer(serFact, 0); // output combineConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); combineConfig.setOutputSerializer(serFact); // driver combineConfig.setDriverStrategy(DriverStrategy.PARTIAL_GROUP); combineConfig.setDriverComparator(compFact, 0); combineConfig.setMemoryDriver(3 * 1024 * 1024); // udf combineConfig.setStubClass(MockReduceStub.class); getTaskConfig().addChainedTask(ChainedCombineDriver.class, combineConfig, "combine"); } // chained map+combine { RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord> testTask = new RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord>(); registerTask(testTask, MapDriver.class, MockMapStub.class); try { testTask.invoke(); } catch (Exception e) { e.printStackTrace(); Assert.fail("Invoke method caused exception."); } } Assert.assertEquals(keyCnt, this.outList.size()); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
public void registerFileOutputTask( AbstractOutputTask outTask, Class<? extends FileOutputFormat> stubClass, String outPath) { TaskConfig dsConfig = new TaskConfig(this.mockEnv.getTaskConfiguration()); dsConfig.setStubWrapper(new UserCodeClassWrapper<FileOutputFormat>(stubClass)); dsConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, outPath); outTask.setEnvironment(this.mockEnv); if (outTask instanceof DataSinkTask<?>) { ((DataSinkTask<?>) outTask).setUserCodeClassLoader(getClass().getClassLoader()); } outTask.registerInputOutput(); }
public void registerTask( AbstractTask task, @SuppressWarnings("rawtypes") Class<? extends PactDriver> driver, Class<? extends Stub> stubClass) { final TaskConfig config = new TaskConfig(this.mockEnv.getTaskConfiguration()); config.setDriver(driver); config.setStubWrapper(new UserCodeClassWrapper<Stub>(stubClass)); task.setEnvironment(this.mockEnv); if (task instanceof RegularPactTask<?, ?>) { ((RegularPactTask<?, ?>) task).setUserCodeClassLoader(getClass().getClassLoader()); } task.registerInputOutput(); }
@SuppressWarnings("unchecked") private static JobInputVertex createModelsInput( JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) { CsvInputFormat modelsInFormat = new CsvInputFormat(' ', LongValue.class, LongValue.class, LongValue.class, LongValue.class); JobInputVertex modelsInput = JobGraphUtils.createInput( modelsInFormat, pointsPath, "Input[Models]", jobGraph, numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(modelsInput.getConfiguration()); taskConfig.addOutputShipStrategy(ShipStrategyType.BROADCAST); taskConfig.setOutputSerializer(serializer); } return modelsInput; }
private static JobOutputVertex createOutput( JobGraph jobGraph, String resultPath, int numSubTasks, TypeSerializerFactory<?> serializer) { JobOutputVertex output = JobGraphUtils.createFileOutput(jobGraph, "Output", numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(output.getConfiguration()); taskConfig.addInputToGroup(0); taskConfig.setInputSerializer(serializer, 0); PointOutFormat outFormat = new PointOutFormat(); outFormat.setOutputFilePath(new Path(resultPath)); taskConfig.setStubWrapper(new UserCodeObjectWrapper<PointOutFormat>(outFormat)); } return output; }
private static JobOutputVertex createOutput( JobGraph jobGraph, String resultPath, int numSubTasks, TypeSerializerFactory<?> serializer) { JobOutputVertex output = JobGraphUtils.createFileOutput(jobGraph, "Output", numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(output.getConfiguration()); taskConfig.addInputToGroup(0); taskConfig.setInputSerializer(serializer, 0); @SuppressWarnings("unchecked") CsvOutputFormat outFormat = new CsvOutputFormat("\n", " ", LongValue.class, LongValue.class, LongValue.class); outFormat.setOutputFilePath(new Path(resultPath)); taskConfig.setStubWrapper(new UserCodeObjectWrapper<CsvOutputFormat>(outFormat)); } return output; }
public void addOutput(List<PactRecord> output) { this.mockEnv.addOutput(output); TaskConfig conf = new TaskConfig(this.mockEnv.getTaskConfiguration()); conf.addOutputShipStrategy(ShipStrategyType.FORWARD); conf.setOutputSerializer(PactRecordSerializerFactory.get()); }
public void addInput(MutableObjectIterator<PactRecord> input, int groupId) { this.mockEnv.addInput(input); TaskConfig conf = new TaskConfig(this.mockEnv.getTaskConfiguration()); conf.addInputToGroup(groupId); conf.setInputSerializer(PactRecordSerializerFactory.get(), groupId); }
private static JobTaskVertex createMapper( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> serializer) { JobTaskVertex pointsInput = JobGraphUtils.createTask( RegularPactTask.class, "Map[DotProducts]", jobGraph, numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration()); taskConfig.setStubWrapper(new UserCodeClassWrapper<DotProducts>(DotProducts.class)); taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); taskConfig.setOutputSerializer(serializer); taskConfig.setDriver(CollectorMapDriver.class); taskConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); taskConfig.addInputToGroup(0); taskConfig.setInputLocalStrategy(0, LocalStrategy.NONE); taskConfig.setInputSerializer(serializer, 0); taskConfig.setBroadcastInputName("models", 0); taskConfig.addBroadcastInputToGroup(0); taskConfig.setBroadcastInputSerializer(serializer, 0); } return pointsInput; }
private static JobTaskVertex createIterationHead( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> serializer) { JobTaskVertex head = JobGraphUtils.createTask( IterationHeadPactTask.class, "Iteration Head", jobGraph, numSubTasks, numSubTasks); TaskConfig headConfig = new TaskConfig(head.getConfiguration()); headConfig.setIterationId(ITERATION_ID); // initial input / partial solution headConfig.addInputToGroup(0); headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0); headConfig.setInputSerializer(serializer, 0); // back channel / iterations headConfig.setBackChannelMemory(MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE); // output into iteration. broadcasting the centers headConfig.setOutputSerializer(serializer); headConfig.addOutputShipStrategy(ShipStrategyType.BROADCAST); // final output TaskConfig headFinalOutConfig = new TaskConfig(new Configuration()); headFinalOutConfig.setOutputSerializer(serializer); headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig); // the sync headConfig.setIterationHeadIndexOfSyncOutput(2); // the driver headConfig.setDriver(NoOpDriver.class); headConfig.setDriverStrategy(DriverStrategy.UNARY_NO_OP); return head; }
private static JobTaskVertex createMapper( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> inputSerializer, TypeSerializerFactory<?> broadcastVarSerializer, TypeSerializerFactory<?> outputSerializer, TypeComparatorFactory<?> outputComparator) { JobTaskVertex mapper = JobGraphUtils.createTask( IterationIntermediatePactTask.class, "Map (Select nearest center)", jobGraph, numSubTasks, numSubTasks); TaskConfig intermediateConfig = new TaskConfig(mapper.getConfiguration()); intermediateConfig.setIterationId(ITERATION_ID); intermediateConfig.setDriver(CollectorMapDriver.class); intermediateConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); intermediateConfig.addInputToGroup(0); intermediateConfig.setInputSerializer(inputSerializer, 0); intermediateConfig.setOutputSerializer(outputSerializer); intermediateConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH); intermediateConfig.setOutputComparator(outputComparator, 0); intermediateConfig.setBroadcastInputName("centers", 0); intermediateConfig.addBroadcastInputToGroup(0); intermediateConfig.setBroadcastInputSerializer(broadcastVarSerializer, 0); // the udf intermediateConfig.setStubWrapper( new UserCodeObjectWrapper<SelectNearestCenter>(new SelectNearestCenter())); return mapper; }
private static JobTaskVertex createReducer( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> inputSerializer, TypeComparatorFactory<?> inputComparator, TypeSerializerFactory<?> outputSerializer) { // ---------------- the tail (co group) -------------------- JobTaskVertex tail = JobGraphUtils.createTask( IterationTailPactTask.class, "Reduce / Iteration Tail", jobGraph, numSubTasks, numSubTasks); TaskConfig tailConfig = new TaskConfig(tail.getConfiguration()); tailConfig.setIterationId(ITERATION_ID); tailConfig.setIsWorksetUpdate(); // inputs and driver tailConfig.setDriver(ReduceDriver.class); tailConfig.setDriverStrategy(DriverStrategy.SORTED_GROUP); tailConfig.addInputToGroup(0); tailConfig.setInputSerializer(inputSerializer, 0); tailConfig.setDriverComparator(inputComparator, 0); tailConfig.setInputLocalStrategy(0, LocalStrategy.SORT); tailConfig.setInputComparator(inputComparator, 0); tailConfig.setMemoryInput(0, MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE); tailConfig.setFilehandlesInput(0, 128); tailConfig.setSpillingThresholdInput(0, 0.9f); // output tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); tailConfig.setOutputSerializer(outputSerializer); // the udf tailConfig.setStubWrapper( new UserCodeObjectWrapper<RecomputeClusterCenter>(new RecomputeClusterCenter())); return tail; }