private static JobInputVertex createPointsInput( JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) { @SuppressWarnings("unchecked") CsvInputFormat pointsInFormat = new CsvInputFormat( '|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class); JobInputVertex pointsInput = JobGraphUtils.createInput( pointsInFormat, pointsPath, "[Points]", jobGraph, numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration()); taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); taskConfig.setOutputSerializer(serializer); TaskConfig chainedMapper = new TaskConfig(new Configuration()); chainedMapper.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); chainedMapper.setStubWrapper(new UserCodeObjectWrapper<PointBuilder>(new PointBuilder())); chainedMapper.addOutputShipStrategy(ShipStrategyType.FORWARD); chainedMapper.setOutputSerializer(serializer); taskConfig.addChainedTask(ChainedCollectorMapDriver.class, chainedMapper, "Build points"); } return pointsInput; }
private static JobTaskVertex createMapper( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> serializer) { JobTaskVertex pointsInput = JobGraphUtils.createTask( RegularPactTask.class, "Map[DotProducts]", jobGraph, numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration()); taskConfig.setStubWrapper(new UserCodeClassWrapper<DotProducts>(DotProducts.class)); taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); taskConfig.setOutputSerializer(serializer); taskConfig.setDriver(CollectorMapDriver.class); taskConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); taskConfig.addInputToGroup(0); taskConfig.setInputLocalStrategy(0, LocalStrategy.NONE); taskConfig.setInputSerializer(serializer, 0); taskConfig.setBroadcastInputName("models", 0); taskConfig.addBroadcastInputToGroup(0); taskConfig.setBroadcastInputSerializer(serializer, 0); } return pointsInput; }
@Test public void testFailingMapTask() { int keyCnt = 100; int valCnt = 20; try { // environment initEnvironment(3 * 1024 * 1024); addInput(new UniformPactRecordGenerator(keyCnt, valCnt, false), 0); addOutput(this.outList); // chained combine config { final TaskConfig combineConfig = new TaskConfig(new Configuration()); // input combineConfig.addInputToGroup(0); combineConfig.setInputSerializer(serFact, 0); // output combineConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); combineConfig.setOutputSerializer(serFact); // driver combineConfig.setDriverStrategy(DriverStrategy.PARTIAL_GROUP); combineConfig.setDriverComparator(compFact, 0); combineConfig.setMemoryDriver(3 * 1024 * 1024); // udf combineConfig.setStubClass(MockFailingCombineStub.class); getTaskConfig().addChainedTask(ChainedCombineDriver.class, combineConfig, "combine"); } // chained map+combine { final RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord> testTask = new RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord>(); super.registerTask(testTask, MapDriver.class, MockMapStub.class); boolean stubFailed = false; try { testTask.invoke(); } catch (Exception e) { stubFailed = true; } Assert.assertTrue("Stub exception was not forwarded.", stubFailed); } } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
@Test public void testMapTask() { final int keyCnt = 100; final int valCnt = 20; try { // environment initEnvironment(3 * 1024 * 1024); addInput(new UniformPactRecordGenerator(keyCnt, valCnt, false), 0); addOutput(this.outList); // chained combine config { final TaskConfig combineConfig = new TaskConfig(new Configuration()); // input combineConfig.addInputToGroup(0); combineConfig.setInputSerializer(serFact, 0); // output combineConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); combineConfig.setOutputSerializer(serFact); // driver combineConfig.setDriverStrategy(DriverStrategy.PARTIAL_GROUP); combineConfig.setDriverComparator(compFact, 0); combineConfig.setMemoryDriver(3 * 1024 * 1024); // udf combineConfig.setStubClass(MockReduceStub.class); getTaskConfig().addChainedTask(ChainedCombineDriver.class, combineConfig, "combine"); } // chained map+combine { RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord> testTask = new RegularPactTask<GenericMapper<PactRecord, PactRecord>, PactRecord>(); registerTask(testTask, MapDriver.class, MockMapStub.class); try { testTask.invoke(); } catch (Exception e) { e.printStackTrace(); Assert.fail("Invoke method caused exception."); } } Assert.assertEquals(keyCnt, this.outList.size()); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
private static JobTaskVertex createIterationHead( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> serializer) { JobTaskVertex head = JobGraphUtils.createTask( IterationHeadPactTask.class, "Iteration Head", jobGraph, numSubTasks, numSubTasks); TaskConfig headConfig = new TaskConfig(head.getConfiguration()); headConfig.setIterationId(ITERATION_ID); // initial input / partial solution headConfig.addInputToGroup(0); headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0); headConfig.setInputSerializer(serializer, 0); // back channel / iterations headConfig.setBackChannelMemory(MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE); // output into iteration. broadcasting the centers headConfig.setOutputSerializer(serializer); headConfig.addOutputShipStrategy(ShipStrategyType.BROADCAST); // final output TaskConfig headFinalOutConfig = new TaskConfig(new Configuration()); headFinalOutConfig.setOutputSerializer(serializer); headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig); // the sync headConfig.setIterationHeadIndexOfSyncOutput(2); // the driver headConfig.setDriver(NoOpDriver.class); headConfig.setDriverStrategy(DriverStrategy.UNARY_NO_OP); return head; }
@SuppressWarnings("unchecked") private static JobInputVertex createModelsInput( JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) { CsvInputFormat modelsInFormat = new CsvInputFormat(' ', LongValue.class, LongValue.class, LongValue.class, LongValue.class); JobInputVertex modelsInput = JobGraphUtils.createInput( modelsInFormat, pointsPath, "Input[Models]", jobGraph, numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(modelsInput.getConfiguration()); taskConfig.addOutputShipStrategy(ShipStrategyType.BROADCAST); taskConfig.setOutputSerializer(serializer); } return modelsInput; }
private static JobTaskVertex createReducer( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> inputSerializer, TypeComparatorFactory<?> inputComparator, TypeSerializerFactory<?> outputSerializer) { // ---------------- the tail (co group) -------------------- JobTaskVertex tail = JobGraphUtils.createTask( IterationTailPactTask.class, "Reduce / Iteration Tail", jobGraph, numSubTasks, numSubTasks); TaskConfig tailConfig = new TaskConfig(tail.getConfiguration()); tailConfig.setIterationId(ITERATION_ID); tailConfig.setIsWorksetUpdate(); // inputs and driver tailConfig.setDriver(ReduceDriver.class); tailConfig.setDriverStrategy(DriverStrategy.SORTED_GROUP); tailConfig.addInputToGroup(0); tailConfig.setInputSerializer(inputSerializer, 0); tailConfig.setDriverComparator(inputComparator, 0); tailConfig.setInputLocalStrategy(0, LocalStrategy.SORT); tailConfig.setInputComparator(inputComparator, 0); tailConfig.setMemoryInput(0, MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE); tailConfig.setFilehandlesInput(0, 128); tailConfig.setSpillingThresholdInput(0, 0.9f); // output tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); tailConfig.setOutputSerializer(outputSerializer); // the udf tailConfig.setStubWrapper( new UserCodeObjectWrapper<RecomputeClusterCenter>(new RecomputeClusterCenter())); return tail; }
private static JobTaskVertex createMapper( JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> inputSerializer, TypeSerializerFactory<?> broadcastVarSerializer, TypeSerializerFactory<?> outputSerializer, TypeComparatorFactory<?> outputComparator) { JobTaskVertex mapper = JobGraphUtils.createTask( IterationIntermediatePactTask.class, "Map (Select nearest center)", jobGraph, numSubTasks, numSubTasks); TaskConfig intermediateConfig = new TaskConfig(mapper.getConfiguration()); intermediateConfig.setIterationId(ITERATION_ID); intermediateConfig.setDriver(CollectorMapDriver.class); intermediateConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); intermediateConfig.addInputToGroup(0); intermediateConfig.setInputSerializer(inputSerializer, 0); intermediateConfig.setOutputSerializer(outputSerializer); intermediateConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH); intermediateConfig.setOutputComparator(outputComparator, 0); intermediateConfig.setBroadcastInputName("centers", 0); intermediateConfig.addBroadcastInputToGroup(0); intermediateConfig.setBroadcastInputSerializer(broadcastVarSerializer, 0); // the udf intermediateConfig.setStubWrapper( new UserCodeObjectWrapper<SelectNearestCenter>(new SelectNearestCenter())); return mapper; }
public void addOutput(List<PactRecord> output) { this.mockEnv.addOutput(output); TaskConfig conf = new TaskConfig(this.mockEnv.getTaskConfiguration()); conf.addOutputShipStrategy(ShipStrategyType.FORWARD); conf.setOutputSerializer(PactRecordSerializerFactory.get()); }