private static Scheduler getScheduler(int numInstances, int numSlotsPerInstance) throws Exception { Scheduler scheduler = new Scheduler(); for (int i = 0; i < numInstances; i++) { byte[] ipAddress = new byte[] {10, 0, 1, (byte) (1 + i)}; int dataPort = 12001 + i; String host = "host" + (i + 1); Instance instance = getInstance(ipAddress, dataPort, host, numSlotsPerInstance); scheduler.newInstanceAvailable(instance); } return scheduler; }
private Map<ExecutionAttemptID, Execution> setupExecution( JobVertex v1, int dop1, JobVertex v2, int dop2) throws Exception { final JobID jobId = new JobID(); v1.setParallelism(dop1); v2.setParallelism(dop2); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); // execution graph that executes actions synchronously ExecutionGraph eg = new ExecutionGraph( TestingUtils.directExecutionContext(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); eg.setQueuedSchedulingAllowed(false); List<JobVertex> ordered = Arrays.asList(v1, v2); eg.attachJobGraph(ordered); Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); for (int i = 0; i < dop1 + dop2; i++) { scheduler.newInstanceAvailable( ExecutionGraphTestUtils.getInstance( new ExecutionGraphTestUtils.SimpleActorGateway( TestingUtils.directExecutionContext()))); } assertEquals(dop1 + dop2, scheduler.getNumberOfAvailableSlots()); // schedule, this triggers mock deployment eg.scheduleForExecution(scheduler); Map<ExecutionAttemptID, Execution> executions = eg.getRegisteredExecutions(); assertEquals(dop1 + dop2, executions.size()); return executions; }
@Test public void testMultipleInstancesPerHost() { TestLocatableInputSplit[] splits = new TestLocatableInputSplit[] { new TestLocatableInputSplit(1, "host1"), new TestLocatableInputSplit(2, "host1"), new TestLocatableInputSplit(3, "host2"), new TestLocatableInputSplit(4, "host2"), new TestLocatableInputSplit(5, "host3"), new TestLocatableInputSplit(6, "host3") }; try { AbstractJobVertex vertex = new AbstractJobVertex("test vertex"); vertex.setParallelism(6); vertex.setInvokableClass(DummyInvokable.class); vertex.setInputSplitSource(new TestInputSplitSource(splits)); JobGraph jobGraph = new JobGraph("test job", vertex); ExecutionGraph eg = new ExecutionGraph( jobGraph.getJobID(), jobGraph.getName(), jobGraph.getJobConfiguration(), TIMEOUT); eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources()); eg.setQueuedSchedulingAllowed(false); // create a scheduler with 6 instances where always two are on the same host Scheduler scheduler = new Scheduler(); Instance i1 = getInstance(new byte[] {10, 0, 1, 1}, 12345, "host1", 1); Instance i2 = getInstance(new byte[] {10, 0, 1, 1}, 12346, "host1", 1); Instance i3 = getInstance(new byte[] {10, 0, 1, 2}, 12345, "host2", 1); Instance i4 = getInstance(new byte[] {10, 0, 1, 2}, 12346, "host2", 1); Instance i5 = getInstance(new byte[] {10, 0, 1, 3}, 12345, "host3", 1); Instance i6 = getInstance(new byte[] {10, 0, 1, 3}, 12346, "host4", 1); scheduler.newInstanceAvailable(i1); scheduler.newInstanceAvailable(i2); scheduler.newInstanceAvailable(i3); scheduler.newInstanceAvailable(i4); scheduler.newInstanceAvailable(i5); scheduler.newInstanceAvailable(i6); eg.scheduleForExecution(scheduler); ExecutionVertex[] tasks = eg.getVerticesTopologically().iterator().next().getTaskVertices(); assertEquals(6, tasks.length); Instance taskInstance1 = tasks[0].getCurrentAssignedResource().getInstance(); Instance taskInstance2 = tasks[1].getCurrentAssignedResource().getInstance(); Instance taskInstance3 = tasks[2].getCurrentAssignedResource().getInstance(); Instance taskInstance4 = tasks[3].getCurrentAssignedResource().getInstance(); Instance taskInstance5 = tasks[4].getCurrentAssignedResource().getInstance(); Instance taskInstance6 = tasks[5].getCurrentAssignedResource().getInstance(); assertTrue(taskInstance1 == i1 || taskInstance1 == i2); assertTrue(taskInstance2 == i1 || taskInstance2 == i2); assertTrue(taskInstance3 == i3 || taskInstance3 == i4); assertTrue(taskInstance4 == i3 || taskInstance4 == i4); assertTrue(taskInstance5 == i5 || taskInstance5 == i6); assertTrue(taskInstance6 == i5 || taskInstance6 == i6); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test /** * Tests that a blocking batch job fails if there are not enough resources left to schedule the * succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks * swallow the fail exception when scheduling a consumer task. */ public void testNoResourceAvailableFailure() throws Exception { final JobID jobId = new JobID(); JobVertex v1 = new JobVertex("source"); JobVertex v2 = new JobVertex("sink"); int dop1 = 1; int dop2 = 1; v1.setParallelism(dop1); v2.setParallelism(dop2); v1.setInvokableClass(BatchTask.class); v2.setInvokableClass(BatchTask.class); v2.connectNewDataSetAsInput( v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING, false); // execution graph that executes actions synchronously ExecutionGraph eg = new ExecutionGraph( TestingUtils.directExecutionContext(), jobId, "failing test job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy()); eg.setQueuedSchedulingAllowed(false); List<JobVertex> ordered = Arrays.asList(v1, v2); eg.attachJobGraph(ordered); Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); for (int i = 0; i < dop1; i++) { scheduler.newInstanceAvailable( ExecutionGraphTestUtils.getInstance( new ExecutionGraphTestUtils.SimpleActorGateway( TestingUtils.directExecutionContext()))); } assertEquals(dop1, scheduler.getNumberOfAvailableSlots()); // schedule, this triggers mock deployment eg.scheduleForExecution(scheduler); ExecutionAttemptID attemptID = eg.getJobVertex(v1.getID()) .getTaskVertices()[0] .getCurrentExecutionAttempt() .getAttemptId(); eg.updateState(new TaskExecutionState(jobId, attemptID, ExecutionState.RUNNING)); eg.updateState( new TaskExecutionState( jobId, attemptID, ExecutionState.FINISHED, null, new AccumulatorSnapshot( jobId, attemptID, new HashMap<AccumulatorRegistry.Metric, Accumulator<?, ?>>(), new HashMap<String, Accumulator<?, ?>>()))); assertEquals(JobStatus.FAILED, eg.getState()); }
/** * NOTE: This method only throws exceptions if it is in an illegal state to be scheduled, or if * the tasks needs to be scheduled immediately and no resource is available. If the task is * accepted by the schedule, any error sets the vertex state to failed and triggers the recovery * logic. * * @param scheduler The scheduler to use to schedule this execution attempt. * @param queued Flag to indicate whether the scheduler may queue this task if it cannot * immediately deploy it. * @throws IllegalStateException Thrown, if the vertex is not in CREATED state, which is the only * state that permits scheduling. * @throws NoResourceAvailableException Thrown is no queued scheduling is allowed and no resources * are currently available. */ public boolean scheduleForExecution(Scheduler scheduler, boolean queued) throws NoResourceAvailableException { if (scheduler == null) { throw new IllegalArgumentException("Cannot send null Scheduler when scheduling execution."); } final SlotSharingGroup sharingGroup = vertex.getJobVertex().getSlotSharingGroup(); final CoLocationConstraint locationConstraint = vertex.getLocationConstraint(); // sanity check if (locationConstraint != null && sharingGroup == null) { throw new RuntimeException( "Trying to schedule with co-location constraint but without slot sharing allowed."); } if (transitionState(CREATED, SCHEDULED)) { ScheduledUnit toSchedule = locationConstraint == null ? new ScheduledUnit(this, sharingGroup) : new ScheduledUnit(this, sharingGroup, locationConstraint); // IMPORTANT: To prevent leaks of cluster resources, we need to make sure that slots are // returned // in all cases where the deployment failed. we use many try {} finally {} clauses to // assure that if (queued) { SlotAllocationFuture future = scheduler.scheduleQueued(toSchedule); future.setFutureAction( new SlotAllocationFutureAction() { @Override public void slotAllocated(SimpleSlot slot) { try { deployToSlot(slot); } catch (Throwable t) { try { slot.releaseSlot(); } finally { markFailed(t); } } } }); } else { SimpleSlot slot = scheduler.scheduleImmediately(toSchedule); try { deployToSlot(slot); } catch (Throwable t) { try { slot.releaseSlot(); } finally { markFailed(t); } } } return true; } else { // call race, already deployed, or already done return false; } }