public static Map<JobVertexID, ExecutionJobVertex> includeLegacyJobVertexIDs( Map<JobVertexID, ExecutionJobVertex> tasks) { Map<JobVertexID, ExecutionJobVertex> expanded = new HashMap<>(2 * tasks.size()); // first include all new ids expanded.putAll(tasks); // now expand and add legacy ids for (ExecutionJobVertex executionJobVertex : tasks.values()) { if (null != executionJobVertex) { JobVertex jobVertex = executionJobVertex.getJobVertex(); if (null != jobVertex) { List<JobVertexID> alternativeIds = jobVertex.getIdAlternatives(); for (JobVertexID jobVertexID : alternativeIds) { ExecutionJobVertex old = expanded.put(jobVertexID, executionJobVertex); Preconditions.checkState( null == old || old.equals(executionJobVertex), "Ambiguous jobvertex id detected during expansion to legacy ids."); } } } } return expanded; }
/** * Verifies a correct error message when vertices with master initialization (input formats / * output formats) fail. */ @Test public void testFailureWhenInitializeOnMasterFails() { try { // create a simple job graph JobVertex jobVertex = new JobVertex("Vertex that fails in initializeOnMaster") { @Override public void initializeOnMaster(ClassLoader loader) throws Exception { throw new RuntimeException("test exception"); } }; jobVertex.setInvokableClass(Tasks.NoOpInvokable.class); JobGraph jg = new JobGraph("test job", jobVertex); // submit the job Future<Object> submitFuture = jmGateway.ask( new JobManagerMessages.SubmitJob(jg, ListeningBehaviour.EXECUTION_RESULT), timeout); try { Await.result(submitFuture, timeout); } catch (JobExecutionException e) { // that is what we expect // test that the exception nesting is not too deep assertTrue(e.getCause() instanceof RuntimeException); } catch (Exception e) { fail("Wrong exception type"); } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private JobGraph createTestJobGraph( String jobName, int senderParallelism, int receiverParallelism) { // The sender and receiver invokable logic ensure that each subtask gets the expected data final JobVertex sender = new JobVertex("Sender"); sender.setInvokableClass(RoundRobinSubtaskIndexSender.class); sender .getConfiguration() .setInteger(RoundRobinSubtaskIndexSender.CONFIG_KEY, receiverParallelism); sender.setParallelism(senderParallelism); final JobVertex receiver = new JobVertex("Receiver"); receiver.setInvokableClass(SubtaskIndexReceiver.class); receiver.getConfiguration().setInteger(SubtaskIndexReceiver.CONFIG_KEY, senderParallelism); receiver.setParallelism(receiverParallelism); receiver.connectNewDataSetAsInput( sender, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING); final JobGraph jobGraph = new JobGraph(jobName, sender, receiver); // We need to allow queued scheduling, because there are not enough slots available // to run all tasks at once. We queue tasks and then let them finish/consume the blocking // result one after the other. jobGraph.setAllowQueuedScheduling(true); return jobGraph; }
@Test public void testFailureWhenJarBlobsMissing() { try { // create a simple job graph JobVertex jobVertex = new JobVertex("Test Vertex"); jobVertex.setInvokableClass(Tasks.NoOpInvokable.class); JobGraph jg = new JobGraph("test job", jobVertex); // request the blob port from the job manager Future<Object> future = jmGateway.ask(JobManagerMessages.getRequestBlobManagerPort(), timeout); int blobPort = (Integer) Await.result(future, timeout); // upload two dummy bytes and add their keys to the job graph as dependencies BlobKey key1, key2; BlobClient bc = new BlobClient(new InetSocketAddress("localhost", blobPort)); try { key1 = bc.put(new byte[10]); key2 = bc.put(new byte[10]); // delete one of the blobs to make sure that the startup failed bc.delete(key2); } finally { bc.close(); } jg.addBlob(key1); jg.addBlob(key2); // submit the job Future<Object> submitFuture = jmGateway.ask( new JobManagerMessages.SubmitJob(jg, ListeningBehaviour.EXECUTION_RESULT), timeout); try { Await.result(submitFuture, timeout); } catch (JobExecutionException e) { // that is what we expect assertTrue(e.getCause() instanceof IOException); } catch (Exception e) { fail("Wrong exception type"); } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private Map<ExecutionAttemptID, Execution> setupExecution( JobVertex v1, int dop1, JobVertex v2, int dop2) throws Exception { final JobID jobId = new JobID(); v1.setParallelism(dop1); v2.setParallelism(dop2); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); // execution graph that executes actions synchronously ExecutionGraph eg = new ExecutionGraph( TestingUtils.directExecutionContext(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); eg.setQueuedSchedulingAllowed(false); List<JobVertex> ordered = Arrays.asList(v1, v2); eg.attachJobGraph(ordered); Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); for (int i = 0; i < dop1 + dop2; i++) { scheduler.newInstanceAvailable( ExecutionGraphTestUtils.getInstance( new ExecutionGraphTestUtils.SimpleActorGateway( TestingUtils.directExecutionContext()))); } assertEquals(dop1 + dop2, scheduler.getNumberOfAvailableSlots()); // schedule, this triggers mock deployment eg.scheduleForExecution(scheduler); Map<ExecutionAttemptID, Execution> executions = eg.getRegisteredExecutions(); assertEquals(dop1 + dop2, executions.size()); return executions; }
public void connectToPredecessors( Map<IntermediateDataSetID, IntermediateResult> intermediateDataSets) throws JobException { List<JobEdge> inputs = jobVertex.getInputs(); if (LOG.isDebugEnabled()) { LOG.debug( String.format( "Connecting ExecutionJobVertex %s (%s) to %d predecessors.", jobVertex.getID(), jobVertex.getName(), inputs.size())); } for (int num = 0; num < inputs.size(); num++) { JobEdge edge = inputs.get(num); if (LOG.isDebugEnabled()) { if (edge.getSource() == null) { LOG.debug( String.format( "Connecting input %d of vertex %s (%s) to intermediate result referenced via ID %s.", num, jobVertex.getID(), jobVertex.getName(), edge.getSourceId())); } else { LOG.debug( String.format( "Connecting input %d of vertex %s (%s) to intermediate result referenced via predecessor %s (%s).", num, jobVertex.getID(), jobVertex.getName(), edge.getSource().getProducer().getID(), edge.getSource().getProducer().getName())); } } // fetch the intermediate result via ID. if it does not exist, then it either has not been // created, or the order // in which this method is called for the job vertices is not a topological order IntermediateResult ires = intermediateDataSets.get(edge.getSourceId()); if (ires == null) { throw new JobException( "Cannot connect this job graph to the previous graph. No previous intermediate result found for ID " + edge.getSourceId()); } this.inputs.add(ires); int consumerIndex = ires.registerConsumer(); for (int i = 0; i < parallelism; i++) { ExecutionVertex ev = taskVertices[i]; ev.connectSource(num, ires, edge, consumerIndex); } } }
public JobGraph createBlockingJob(int parallelism) { Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true); JobVertex sender = new JobVertex("sender"); JobVertex receiver = new JobVertex("receiver"); sender.setInvokableClass(Tasks.Sender.class); receiver.setInvokableClass(Tasks.BlockingOnceReceiver.class); sender.setParallelism(parallelism); receiver.setParallelism(parallelism); receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE); SlotSharingGroup slotSharingGroup = new SlotSharingGroup(); sender.setSlotSharingGroup(slotSharingGroup); receiver.setSlotSharingGroup(slotSharingGroup); return new JobGraph("Blocking test job", sender, receiver); }
public void resetForNewExecution() { if (!(numSubtasksInFinalState == 0 || numSubtasksInFinalState == parallelism)) { throw new IllegalStateException("Cannot reset vertex that is not in final state"); } synchronized (stateMonitor) { // check and reset the sharing groups with scheduler hints if (slotSharingGroup != null) { slotSharingGroup.clearTaskAssignment(); } // reset vertices one by one. if one reset fails, the "vertices in final state" // fields will be consistent to handle triggered cancel calls for (int i = 0; i < parallelism; i++) { taskVertices[i].resetForNewExecution(); if (finishedSubtasks[i]) { finishedSubtasks[i] = false; numSubtasksInFinalState--; } } if (numSubtasksInFinalState != 0) { throw new RuntimeException("Bug: resetting the execution job vertex failed."); } // set up the input splits again try { if (this.inputSplits != null) { // lazy assignment @SuppressWarnings("unchecked") InputSplitSource<InputSplit> splitSource = (InputSplitSource<InputSplit>) jobVertex.getInputSplitSource(); this.splitAssigner = splitSource.getInputSplitAssigner(this.inputSplits); } } catch (Throwable t) { throw new RuntimeException( "Re-creating the input split assigner failed: " + t.getMessage(), t); } // Reset intermediate results for (IntermediateResult result : producedDataSets) { result.resetForNewExecution(); } } }
@Test public void testBuildDeploymentDescriptor() { try { final JobID jobId = new JobID(); final JobVertexID jid1 = new JobVertexID(); final JobVertexID jid2 = new JobVertexID(); final JobVertexID jid3 = new JobVertexID(); final JobVertexID jid4 = new JobVertexID(); JobVertex v1 = new JobVertex("v1", jid1); JobVertex v2 = new JobVertex("v2", jid2); JobVertex v3 = new JobVertex("v3", jid3); JobVertex v4 = new JobVertex("v4", jid4); v1.setParallelism(10); v2.setParallelism(10); v3.setParallelism(10); v4.setParallelism(10); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); v3.setInvokableClass(BatchTask.class); v4.setInvokableClass(BatchTask.class); v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL); v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL); v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL); ExecutionGraph eg = new ExecutionGraph( TestingUtils.defaultExecutionContext(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); List<JobVertex> ordered = Arrays.asList(v1, v2, v3, v4); eg.attachJobGraph(ordered); ExecutionJobVertex ejv = eg.getAllVertices().get(jid2); ExecutionVertex vertex = ejv.getTaskVertices()[3]; ExecutionGraphTestUtils.SimpleActorGateway instanceGateway = new ExecutionGraphTestUtils.SimpleActorGateway(TestingUtils.directExecutionContext()); final Instance instance = getInstance(instanceGateway); final SimpleSlot slot = instance.allocateSimpleSlot(jobId); assertEquals(ExecutionState.CREATED, vertex.getExecutionState()); vertex.deployToSlot(slot); assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState()); TaskDeploymentDescriptor descr = instanceGateway.lastTDD; assertNotNull(descr); assertEquals(jobId, descr.getJobID()); assertEquals(jid2, descr.getVertexID()); assertEquals(3, descr.getIndexInSubtaskGroup()); assertEquals(10, descr.getNumberOfSubtasks()); assertEquals(BatchTask.class.getName(), descr.getInvokableClassName()); assertEquals("v2", descr.getTaskName()); List<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions(); List<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates(); assertEquals(2, producedPartitions.size()); assertEquals(1, consumedPartitions.size()); assertEquals(10, producedPartitions.get(0).getNumberOfSubpartitions()); assertEquals(10, producedPartitions.get(1).getNumberOfSubpartitions()); assertEquals(10, consumedPartitions.get(0).getInputChannelDeploymentDescriptors().length); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test /** * Tests that a blocking batch job fails if there are not enough resources left to schedule the * succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks * swallow the fail exception when scheduling a consumer task. */ public void testNoResourceAvailableFailure() throws Exception { final JobID jobId = new JobID(); JobVertex v1 = new JobVertex("source"); JobVertex v2 = new JobVertex("sink"); int dop1 = 1; int dop2 = 1; v1.setParallelism(dop1); v2.setParallelism(dop2); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); v2.connectNewDataSetAsInput( v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING, false); // execution graph that executes actions synchronously ExecutionGraph eg = new ExecutionGraph( TestingUtils.directExecutionContext(), jobId, "failing test job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); eg.setQueuedSchedulingAllowed(false); List<JobVertex> ordered = Arrays.asList(v1, v2); eg.attachJobGraph(ordered); Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); for (int i = 0; i < dop1; i++) { scheduler.newInstanceAvailable( ExecutionGraphTestUtils.getInstance( new ExecutionGraphTestUtils.SimpleActorGateway( TestingUtils.directExecutionContext()))); } assertEquals(dop1, scheduler.getNumberOfAvailableSlots()); // schedule, this triggers mock deployment eg.scheduleForExecution(scheduler); ExecutionAttemptID attemptID = eg.getJobVertex(v1.getID()) .getTaskVertices()[0] .getCurrentExecutionAttempt() .getAttemptId(); eg.updateState(new TaskExecutionState(jobId, attemptID, ExecutionState.RUNNING)); eg.updateState( new TaskExecutionState( jobId, attemptID, ExecutionState.FINISHED, null, new AccumulatorSnapshot( jobId, attemptID, new HashMap<AccumulatorRegistry.Metric, Accumulator<?, ?>>(), new HashMap<String, Accumulator<?, ?>>()))); assertEquals(JobStatus.FAILED, eg.getState()); }
@Override public JobVertexID getJobVertexId() { return jobVertex.getID(); }
public ExecutionJobVertex( ExecutionGraph graph, JobVertex jobVertex, int defaultParallelism, Time timeout, long createTimestamp) throws JobException, IOException { if (graph == null || jobVertex == null) { throw new NullPointerException(); } this.graph = graph; this.jobVertex = jobVertex; int vertexParallelism = jobVertex.getParallelism(); int numTaskVertices = vertexParallelism > 0 ? vertexParallelism : defaultParallelism; this.parallelism = numTaskVertices; int maxP = jobVertex.getMaxParallelism(); Preconditions.checkArgument( maxP >= parallelism, "The maximum parallelism (" + maxP + ") must be greater or equal than the parallelism (" + parallelism + ")."); this.maxParallelism = maxP; this.serializedTaskInformation = new SerializedValue<>( new TaskInformation( jobVertex.getID(), jobVertex.getName(), parallelism, maxParallelism, jobVertex.getInvokableClassName(), jobVertex.getConfiguration())); this.taskVertices = new ExecutionVertex[numTaskVertices]; this.inputs = new ArrayList<IntermediateResult>(jobVertex.getInputs().size()); // take the sharing group this.slotSharingGroup = jobVertex.getSlotSharingGroup(); this.coLocationGroup = jobVertex.getCoLocationGroup(); // setup the coLocation group if (coLocationGroup != null && slotSharingGroup == null) { throw new JobException("Vertex uses a co-location constraint without using slot sharing"); } // create the intermediate results this.producedDataSets = new IntermediateResult[jobVertex.getNumberOfProducedIntermediateDataSets()]; for (int i = 0; i < jobVertex.getProducedDataSets().size(); i++) { final IntermediateDataSet result = jobVertex.getProducedDataSets().get(i); this.producedDataSets[i] = new IntermediateResult(result.getId(), this, numTaskVertices, result.getResultType()); } Configuration jobConfiguration = graph.getJobConfiguration(); int maxPriorAttemptsHistoryLength = jobConfiguration != null ? jobConfiguration.getInteger(JobManagerOptions.MAX_ATTEMPTS_HISTORY_SIZE) : JobManagerOptions.MAX_ATTEMPTS_HISTORY_SIZE.defaultValue(); // create all task vertices for (int i = 0; i < numTaskVertices; i++) { ExecutionVertex vertex = new ExecutionVertex( this, i, this.producedDataSets, timeout, createTimestamp, maxPriorAttemptsHistoryLength); this.taskVertices[i] = vertex; } // sanity check for the double referencing between intermediate result partitions and execution // vertices for (IntermediateResult ir : this.producedDataSets) { if (ir.getNumberOfAssignedPartitions() != parallelism) { throw new RuntimeException( "The intermediate result's partitions were not correctly assigned."); } } // set up the input splits, if the vertex has any try { @SuppressWarnings("unchecked") InputSplitSource<InputSplit> splitSource = (InputSplitSource<InputSplit>) jobVertex.getInputSplitSource(); if (splitSource != null) { Thread currentThread = Thread.currentThread(); ClassLoader oldContextClassLoader = currentThread.getContextClassLoader(); currentThread.setContextClassLoader(graph.getUserClassLoader()); try { inputSplits = splitSource.createInputSplits(numTaskVertices); if (inputSplits != null) { splitAssigner = splitSource.getInputSplitAssigner(inputSplits); } } finally { currentThread.setContextClassLoader(oldContextClassLoader); } } else { inputSplits = null; } } catch (Throwable t) { throw new JobException("Creating the input splits caused an error: " + t.getMessage(), t); } finishedSubtasks = new boolean[parallelism]; }
public static JobGraph getJobGraph(String[] args) throws Exception { int parallelism = 2; String pageWithRankInputPath = ""; // "file://" + PlayConstants.PLAY_DIR + "test-inputs/danglingpagerank/pageWithRank"; String adjacencyListInputPath = ""; // "file://" + PlayConstants.PLAY_DIR + // "test-inputs/danglingpagerank/adjacencylists"; String outputPath = OperatingSystem.isWindows() ? "file:/c:/tmp/flink/iterations" : "file:///tmp/flink/iterations"; // String confPath = PlayConstants.PLAY_DIR + "local-conf"; int minorConsumer = 25; int matchMemory = 50; int coGroupSortMemory = 50; int numIterations = 25; long numVertices = 5; long numDanglingVertices = 1; String failingWorkers = "1"; int failingIteration = 2; double messageLoss = 0.75; if (args.length >= 15) { parallelism = Integer.parseInt(args[0]); pageWithRankInputPath = args[1]; adjacencyListInputPath = args[2]; outputPath = args[3]; // confPath = args[4]; minorConsumer = Integer.parseInt(args[5]); matchMemory = Integer.parseInt(args[6]); coGroupSortMemory = Integer.parseInt(args[7]); numIterations = Integer.parseInt(args[8]); numVertices = Long.parseLong(args[9]); numDanglingVertices = Long.parseLong(args[10]); failingWorkers = args[11]; failingIteration = Integer.parseInt(args[12]); messageLoss = Double.parseDouble(args[13]); } int totalMemoryConsumption = 3 * minorConsumer + matchMemory + coGroupSortMemory; JobGraph jobGraph = new JobGraph("CompensatableDanglingPageRank"); // --------------- the inputs --------------------- // page rank input InputFormatVertex pageWithRankInput = JobGraphUtils.createInput( new ImprovedDanglingPageRankInputFormat(), pageWithRankInputPath, "DanglingPageWithRankInput", jobGraph, parallelism); TaskConfig pageWithRankInputConfig = new TaskConfig(pageWithRankInput.getConfiguration()); pageWithRankInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH); pageWithRankInputConfig.setOutputComparator(fieldZeroComparator, 0); pageWithRankInputConfig.setOutputSerializer(recSerializer); pageWithRankInputConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices)); // edges as adjacency list InputFormatVertex adjacencyListInput = JobGraphUtils.createInput( new ImprovedAdjacencyListInputFormat(), adjacencyListInputPath, "AdjancencyListInput", jobGraph, parallelism); TaskConfig adjacencyListInputConfig = new TaskConfig(adjacencyListInput.getConfiguration()); adjacencyListInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH); adjacencyListInputConfig.setOutputSerializer(recSerializer); adjacencyListInputConfig.setOutputComparator(fieldZeroComparator, 0); // --------------- the head --------------------- JobVertex head = JobGraphUtils.createTask( IterationHeadPactTask.class, "IterationHead", jobGraph, parallelism); TaskConfig headConfig = new TaskConfig(head.getConfiguration()); headConfig.setIterationId(ITERATION_ID); // initial input / partial solution headConfig.addInputToGroup(0); headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0); headConfig.setInputSerializer(recSerializer, 0); headConfig.setInputComparator(fieldZeroComparator, 0); headConfig.setInputLocalStrategy(0, LocalStrategy.SORT); headConfig.setRelativeMemoryInput(0, (double) minorConsumer / totalMemoryConsumption); headConfig.setFilehandlesInput(0, NUM_FILE_HANDLES_PER_SORT); headConfig.setSpillingThresholdInput(0, SORT_SPILL_THRESHOLD); // back channel / iterations headConfig.setRelativeBackChannelMemory((double) minorConsumer / totalMemoryConsumption); // output into iteration headConfig.setOutputSerializer(recSerializer); headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); // final output TaskConfig headFinalOutConfig = new TaskConfig(new Configuration()); headFinalOutConfig.setOutputSerializer(recSerializer); headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig); // the sync headConfig.setIterationHeadIndexOfSyncOutput(3); headConfig.setNumberOfIterations(numIterations); // the driver headConfig.setDriver(CollectorMapDriver.class); headConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); headConfig.setStubWrapper(new UserCodeClassWrapper<CompensatingMap>(CompensatingMap.class)); headConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices)); headConfig.setStubParameter("compensation.failingWorker", failingWorkers); headConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration)); headConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss)); headConfig.addIterationAggregator( CompensatableDotProductCoGroup.AGGREGATOR_NAME, new PageRankStatsAggregator()); // --------------- the join --------------------- JobVertex intermediate = JobGraphUtils.createTask( IterationIntermediatePactTask.class, "IterationIntermediate", jobGraph, parallelism); TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration()); intermediateConfig.setIterationId(ITERATION_ID); // intermediateConfig.setDriver(RepeatableHashjoinMatchDriverWithCachedBuildside.class); intermediateConfig.setDriver(BuildSecondCachedMatchDriver.class); intermediateConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND); intermediateConfig.setRelativeMemoryDriver((double) matchMemory / totalMemoryConsumption); intermediateConfig.addInputToGroup(0); intermediateConfig.addInputToGroup(1); intermediateConfig.setInputSerializer(recSerializer, 0); intermediateConfig.setInputSerializer(recSerializer, 1); intermediateConfig.setDriverComparator(fieldZeroComparator, 0); intermediateConfig.setDriverComparator(fieldZeroComparator, 1); intermediateConfig.setDriverPairComparator(pairComparatorFactory); intermediateConfig.setOutputSerializer(recSerializer); intermediateConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH); intermediateConfig.setOutputComparator(fieldZeroComparator, 0); intermediateConfig.setStubWrapper( new UserCodeClassWrapper<CompensatableDotProductMatch>(CompensatableDotProductMatch.class)); intermediateConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices)); intermediateConfig.setStubParameter("compensation.failingWorker", failingWorkers); intermediateConfig.setStubParameter( "compensation.failingIteration", String.valueOf(failingIteration)); intermediateConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss)); // ---------------- the tail (co group) -------------------- JobVertex tail = JobGraphUtils.createTask( IterationTailPactTask.class, "IterationTail", jobGraph, parallelism); TaskConfig tailConfig = new TaskConfig(tail.getConfiguration()); tailConfig.setIterationId(ITERATION_ID); tailConfig.setIsWorksetUpdate(); // TODO we need to combine! // inputs and driver tailConfig.setDriver(CoGroupDriver.class); tailConfig.setDriverStrategy(DriverStrategy.CO_GROUP); tailConfig.addInputToGroup(0); tailConfig.addInputToGroup(1); tailConfig.setInputSerializer(recSerializer, 0); tailConfig.setInputSerializer(recSerializer, 1); tailConfig.setDriverComparator(fieldZeroComparator, 0); tailConfig.setDriverComparator(fieldZeroComparator, 1); tailConfig.setDriverPairComparator(pairComparatorFactory); tailConfig.setInputAsynchronouslyMaterialized(0, true); tailConfig.setRelativeInputMaterializationMemory( 0, (double) minorConsumer / totalMemoryConsumption); tailConfig.setInputLocalStrategy(1, LocalStrategy.SORT); tailConfig.setInputComparator(fieldZeroComparator, 1); tailConfig.setRelativeMemoryInput(1, (double) coGroupSortMemory / totalMemoryConsumption); tailConfig.setFilehandlesInput(1, NUM_FILE_HANDLES_PER_SORT); tailConfig.setSpillingThresholdInput(1, SORT_SPILL_THRESHOLD); // output tailConfig.setOutputSerializer(recSerializer); // the stub tailConfig.setStubWrapper( new UserCodeClassWrapper<CompensatableDotProductCoGroup>( CompensatableDotProductCoGroup.class)); tailConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices)); tailConfig.setStubParameter( "pageRank.numDanglingVertices", String.valueOf(numDanglingVertices)); tailConfig.setStubParameter("compensation.failingWorker", failingWorkers); tailConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration)); tailConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss)); // --------------- the output --------------------- OutputFormatVertex output = JobGraphUtils.createFileOutput(jobGraph, "FinalOutput", parallelism); TaskConfig outputConfig = new TaskConfig(output.getConfiguration()); outputConfig.addInputToGroup(0); outputConfig.setInputSerializer(recSerializer, 0); outputConfig.setStubWrapper( new UserCodeClassWrapper<PageWithRankOutFormat>(PageWithRankOutFormat.class)); outputConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, outputPath); // --------------- the auxiliaries --------------------- JobVertex sync = JobGraphUtils.createSync(jobGraph, parallelism); TaskConfig syncConfig = new TaskConfig(sync.getConfiguration()); syncConfig.setNumberOfIterations(numIterations); syncConfig.addIterationAggregator( CompensatableDotProductCoGroup.AGGREGATOR_NAME, new PageRankStatsAggregator()); syncConfig.setConvergenceCriterion( CompensatableDotProductCoGroup.AGGREGATOR_NAME, new DiffL1NormConvergenceCriterion()); syncConfig.setIterationId(ITERATION_ID); // --------------- the wiring --------------------- JobGraphUtils.connect(pageWithRankInput, head, DistributionPattern.ALL_TO_ALL); JobGraphUtils.connect(head, intermediate, DistributionPattern.POINTWISE); intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1); JobGraphUtils.connect(adjacencyListInput, intermediate, DistributionPattern.ALL_TO_ALL); JobGraphUtils.connect(head, tail, DistributionPattern.POINTWISE); JobGraphUtils.connect(intermediate, tail, DistributionPattern.ALL_TO_ALL); tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1); tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(1, parallelism); JobGraphUtils.connect(head, output, DistributionPattern.POINTWISE); JobGraphUtils.connect(head, sync, DistributionPattern.POINTWISE); SlotSharingGroup sharingGroup = new SlotSharingGroup(); pageWithRankInput.setSlotSharingGroup(sharingGroup); adjacencyListInput.setSlotSharingGroup(sharingGroup); head.setSlotSharingGroup(sharingGroup); intermediate.setSlotSharingGroup(sharingGroup); tail.setSlotSharingGroup(sharingGroup); output.setSlotSharingGroup(sharingGroup); sync.setSlotSharingGroup(sharingGroup); tail.setStrictlyCoLocatedWith(head); intermediate.setStrictlyCoLocatedWith(head); return jobGraph; }
/* * Test setup: * - v1 is isolated, no slot sharing * - v2 and v3 (not connected) share slots * - v4 and v5 (connected) share slots */ @Test public void testAssignSlotSharingGroup() { try { JobVertex v1 = new JobVertex("v1"); JobVertex v2 = new JobVertex("v2"); JobVertex v3 = new JobVertex("v3"); JobVertex v4 = new JobVertex("v4"); JobVertex v5 = new JobVertex("v5"); v1.setParallelism(4); v2.setParallelism(5); v3.setParallelism(7); v4.setParallelism(1); v5.setParallelism(11); v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE); v5.connectNewDataSetAsInput(v4, DistributionPattern.POINTWISE); SlotSharingGroup jg1 = new SlotSharingGroup(); v2.setSlotSharingGroup(jg1); v3.setSlotSharingGroup(jg1); SlotSharingGroup jg2 = new SlotSharingGroup(); v4.setSlotSharingGroup(jg2); v5.setSlotSharingGroup(jg2); List<JobVertex> vertices = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3, v4, v5)); ExecutionGraph eg = new ExecutionGraph( TestingUtils.defaultExecutionContext(), new JobID(), "test job", new Configuration(), ExecutionConfigTest.getSerializedConfig(), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); eg.attachJobGraph(vertices); // verify that the vertices are all in the same slot sharing group SlotSharingGroup group1 = null; SlotSharingGroup group2 = null; // verify that v1 tasks have no slot sharing group assertNull(eg.getJobVertex(v1.getID()).getSlotSharingGroup()); // v2 and v3 are shared group1 = eg.getJobVertex(v2.getID()).getSlotSharingGroup(); assertNotNull(group1); assertEquals(group1, eg.getJobVertex(v3.getID()).getSlotSharingGroup()); assertEquals(2, group1.getJobVertexIds().size()); assertTrue(group1.getJobVertexIds().contains(v2.getID())); assertTrue(group1.getJobVertexIds().contains(v3.getID())); // v4 and v5 are shared group2 = eg.getJobVertex(v4.getID()).getSlotSharingGroup(); assertNotNull(group2); assertEquals(group2, eg.getJobVertex(v5.getID()).getSlotSharingGroup()); assertEquals(2, group1.getJobVertexIds().size()); assertTrue(group2.getJobVertexIds().contains(v4.getID())); assertTrue(group2.getJobVertexIds().contains(v5.getID())); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }