private JobGraph createTestJobGraph( String jobName, int senderParallelism, int receiverParallelism) { // The sender and receiver invokable logic ensure that each subtask gets the expected data final JobVertex sender = new JobVertex("Sender"); sender.setInvokableClass(RoundRobinSubtaskIndexSender.class); sender .getConfiguration() .setInteger(RoundRobinSubtaskIndexSender.CONFIG_KEY, receiverParallelism); sender.setParallelism(senderParallelism); final JobVertex receiver = new JobVertex("Receiver"); receiver.setInvokableClass(SubtaskIndexReceiver.class); receiver.getConfiguration().setInteger(SubtaskIndexReceiver.CONFIG_KEY, senderParallelism); receiver.setParallelism(receiverParallelism); receiver.connectNewDataSetAsInput( sender, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING); final JobGraph jobGraph = new JobGraph(jobName, sender, receiver); // We need to allow queued scheduling, because there are not enough slots available // to run all tasks at once. We queue tasks and then let them finish/consume the blocking // result one after the other. jobGraph.setAllowQueuedScheduling(true); return jobGraph; }
/** * Verifies a correct error message when vertices with master initialization (input formats / * output formats) fail. */ @Test public void testFailureWhenInitializeOnMasterFails() { try { // create a simple job graph JobVertex jobVertex = new JobVertex("Vertex that fails in initializeOnMaster") { @Override public void initializeOnMaster(ClassLoader loader) throws Exception { throw new RuntimeException("test exception"); } }; jobVertex.setInvokableClass(Tasks.NoOpInvokable.class); JobGraph jg = new JobGraph("test job", jobVertex); // submit the job Future<Object> submitFuture = jmGateway.ask( new JobManagerMessages.SubmitJob(jg, ListeningBehaviour.EXECUTION_RESULT), timeout); try { Await.result(submitFuture, timeout); } catch (JobExecutionException e) { // that is what we expect // test that the exception nesting is not too deep assertTrue(e.getCause() instanceof RuntimeException); } catch (Exception e) { fail("Wrong exception type"); } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private Map<ExecutionAttemptID, Execution> setupExecution( JobVertex v1, int dop1, JobVertex v2, int dop2) throws Exception { final JobID jobId = new JobID(); v1.setParallelism(dop1); v2.setParallelism(dop2); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); // execution graph that executes actions synchronously ExecutionGraph eg = new ExecutionGraph( TestingUtils.directExecutionContext(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); eg.setQueuedSchedulingAllowed(false); List<JobVertex> ordered = Arrays.asList(v1, v2); eg.attachJobGraph(ordered); Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); for (int i = 0; i < dop1 + dop2; i++) { scheduler.newInstanceAvailable( ExecutionGraphTestUtils.getInstance( new ExecutionGraphTestUtils.SimpleActorGateway( TestingUtils.directExecutionContext()))); } assertEquals(dop1 + dop2, scheduler.getNumberOfAvailableSlots()); // schedule, this triggers mock deployment eg.scheduleForExecution(scheduler); Map<ExecutionAttemptID, Execution> executions = eg.getRegisteredExecutions(); assertEquals(dop1 + dop2, executions.size()); return executions; }
public JobGraph createBlockingJob(int parallelism) { Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true); JobVertex sender = new JobVertex("sender"); JobVertex receiver = new JobVertex("receiver"); sender.setInvokableClass(Tasks.Sender.class); receiver.setInvokableClass(Tasks.BlockingOnceReceiver.class); sender.setParallelism(parallelism); receiver.setParallelism(parallelism); receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE); SlotSharingGroup slotSharingGroup = new SlotSharingGroup(); sender.setSlotSharingGroup(slotSharingGroup); receiver.setSlotSharingGroup(slotSharingGroup); return new JobGraph("Blocking test job", sender, receiver); }
@Test public void testFailureWhenJarBlobsMissing() { try { // create a simple job graph JobVertex jobVertex = new JobVertex("Test Vertex"); jobVertex.setInvokableClass(Tasks.NoOpInvokable.class); JobGraph jg = new JobGraph("test job", jobVertex); // request the blob port from the job manager Future<Object> future = jmGateway.ask(JobManagerMessages.getRequestBlobManagerPort(), timeout); int blobPort = (Integer) Await.result(future, timeout); // upload two dummy bytes and add their keys to the job graph as dependencies BlobKey key1, key2; BlobClient bc = new BlobClient(new InetSocketAddress("localhost", blobPort)); try { key1 = bc.put(new byte[10]); key2 = bc.put(new byte[10]); // delete one of the blobs to make sure that the startup failed bc.delete(key2); } finally { bc.close(); } jg.addBlob(key1); jg.addBlob(key2); // submit the job Future<Object> submitFuture = jmGateway.ask( new JobManagerMessages.SubmitJob(jg, ListeningBehaviour.EXECUTION_RESULT), timeout); try { Await.result(submitFuture, timeout); } catch (JobExecutionException e) { // that is what we expect assertTrue(e.getCause() instanceof IOException); } catch (Exception e) { fail("Wrong exception type"); } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testBuildDeploymentDescriptor() { try { final JobID jobId = new JobID(); final JobVertexID jid1 = new JobVertexID(); final JobVertexID jid2 = new JobVertexID(); final JobVertexID jid3 = new JobVertexID(); final JobVertexID jid4 = new JobVertexID(); JobVertex v1 = new JobVertex("v1", jid1); JobVertex v2 = new JobVertex("v2", jid2); JobVertex v3 = new JobVertex("v3", jid3); JobVertex v4 = new JobVertex("v4", jid4); v1.setParallelism(10); v2.setParallelism(10); v3.setParallelism(10); v4.setParallelism(10); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); v3.setInvokableClass(BatchTask.class); v4.setInvokableClass(BatchTask.class); v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL); v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL); v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL); ExecutionGraph eg = new ExecutionGraph( TestingUtils.defaultExecutionContext(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); List<JobVertex> ordered = Arrays.asList(v1, v2, v3, v4); eg.attachJobGraph(ordered); ExecutionJobVertex ejv = eg.getAllVertices().get(jid2); ExecutionVertex vertex = ejv.getTaskVertices()[3]; ExecutionGraphTestUtils.SimpleActorGateway instanceGateway = new ExecutionGraphTestUtils.SimpleActorGateway(TestingUtils.directExecutionContext()); final Instance instance = getInstance(instanceGateway); final SimpleSlot slot = instance.allocateSimpleSlot(jobId); assertEquals(ExecutionState.CREATED, vertex.getExecutionState()); vertex.deployToSlot(slot); assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState()); TaskDeploymentDescriptor descr = instanceGateway.lastTDD; assertNotNull(descr); assertEquals(jobId, descr.getJobID()); assertEquals(jid2, descr.getVertexID()); assertEquals(3, descr.getIndexInSubtaskGroup()); assertEquals(10, descr.getNumberOfSubtasks()); assertEquals(BatchTask.class.getName(), descr.getInvokableClassName()); assertEquals("v2", descr.getTaskName()); List<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions(); List<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates(); assertEquals(2, producedPartitions.size()); assertEquals(1, consumedPartitions.size()); assertEquals(10, producedPartitions.get(0).getNumberOfSubpartitions()); assertEquals(10, producedPartitions.get(1).getNumberOfSubpartitions()); assertEquals(10, consumedPartitions.get(0).getInputChannelDeploymentDescriptors().length); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test /** * Tests that a blocking batch job fails if there are not enough resources left to schedule the * succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks * swallow the fail exception when scheduling a consumer task. */ public void testNoResourceAvailableFailure() throws Exception { final JobID jobId = new JobID(); JobVertex v1 = new JobVertex("source"); JobVertex v2 = new JobVertex("sink"); int dop1 = 1; int dop2 = 1; v1.setParallelism(dop1); v2.setParallelism(dop2); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); v2.connectNewDataSetAsInput( v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING, false); // execution graph that executes actions synchronously ExecutionGraph eg = new ExecutionGraph( TestingUtils.directExecutionContext(), jobId, "failing test job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); eg.setQueuedSchedulingAllowed(false); List<JobVertex> ordered = Arrays.asList(v1, v2); eg.attachJobGraph(ordered); Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); for (int i = 0; i < dop1; i++) { scheduler.newInstanceAvailable( ExecutionGraphTestUtils.getInstance( new ExecutionGraphTestUtils.SimpleActorGateway( TestingUtils.directExecutionContext()))); } assertEquals(dop1, scheduler.getNumberOfAvailableSlots()); // schedule, this triggers mock deployment eg.scheduleForExecution(scheduler); ExecutionAttemptID attemptID = eg.getJobVertex(v1.getID()) .getTaskVertices()[0] .getCurrentExecutionAttempt() .getAttemptId(); eg.updateState(new TaskExecutionState(jobId, attemptID, ExecutionState.RUNNING)); eg.updateState( new TaskExecutionState( jobId, attemptID, ExecutionState.FINISHED, null, new AccumulatorSnapshot( jobId, attemptID, new HashMap<AccumulatorRegistry.Metric, Accumulator<?, ?>>(), new HashMap<String, Accumulator<?, ?>>()))); assertEquals(JobStatus.FAILED, eg.getState()); }