private boolean testUberDecision(Configuration conf) { JobID jobID = JobID.forName("job_1234567890000_0001"); JobId jobId = TypeConverter.toYarn(jobID); MRAppMetrics mrAppMetrics = MRAppMetrics.create(); JobImpl job = new JobImpl( jobId, ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 0), 0), conf, mock(EventHandler.class), null, new JobTokenSecretManager(), new Credentials(), null, null, mrAppMetrics, null, true, null, 0, null, null, null, null); InitTransition initTransition = getInitTransition(2); JobEvent mockJobEvent = mock(JobEvent.class); initTransition.transition(job, mockJobEvent); boolean isUber = job.isUber(); return isUber; }
@Test public void testAbortJobCalledAfterKillingTasks() throws IOException { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); conf.set(MRJobConfig.MR_AM_COMMITTER_CANCEL_TIMEOUT_MS, "1000"); InlineDispatcher dispatcher = new InlineDispatcher(); dispatcher.init(conf); dispatcher.start(); OutputCommitter committer = Mockito.mock(OutputCommitter.class); CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, null); // Fail one task. This should land the JobImpl in the FAIL_WAIT state job.handle( new JobTaskEvent(MRBuilderUtils.newTaskId(job.getID(), 1, TaskType.MAP), TaskState.FAILED)); // Verify abort job hasn't been called Mockito.verify(committer, Mockito.never()) .abortJob((JobContext) Mockito.any(), (State) Mockito.any()); assertJobState(job, JobStateInternal.FAIL_WAIT); // Verify abortJob is called once and the job failed Mockito.verify(committer, Mockito.timeout(2000).times(1)) .abortJob((JobContext) Mockito.any(), (State) Mockito.any()); assertJobState(job, JobStateInternal.FAILED); dispatcher.stop(); }
@Test(timeout = 20000) public void testRebootedDuringCommit() throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); conf.setInt(MRJobConfig.MR_AM_MAX_ATTEMPTS, 2); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.start(); CyclicBarrier syncBarrier = new CyclicBarrier(2); OutputCommitter committer = new WaitingOutputCommitter(syncBarrier, true); CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); AppContext mockContext = mock(AppContext.class); when(mockContext.isLastAMRetry()).thenReturn(true); when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false); JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, mockContext); completeJobTasks(job); assertJobState(job, JobStateInternal.COMMITTING); syncBarrier.await(); job.handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); assertJobState(job, JobStateInternal.REBOOT); // return the external state as ERROR since this is last retry. Assert.assertEquals(JobState.RUNNING, job.getState()); when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true); Assert.assertEquals(JobState.ERROR, job.getState()); dispatcher.stop(); commitHandler.stop(); }
@Test(timeout = 20000) public void testCheckJobCompleteSuccess() throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.start(); CyclicBarrier syncBarrier = new CyclicBarrier(2); OutputCommitter committer = new TestingOutputCommitter(syncBarrier, true); CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, null); completeJobTasks(job); assertJobState(job, JobStateInternal.COMMITTING); // let the committer complete and verify the job succeeds syncBarrier.await(); assertJobState(job, JobStateInternal.SUCCEEDED); job.handle(new JobEvent(job.getID(), JobEventType.JOB_TASK_ATTEMPT_COMPLETED)); assertJobState(job, JobStateInternal.SUCCEEDED); job.handle(new JobEvent(job.getID(), JobEventType.JOB_MAP_TASK_RESCHEDULED)); assertJobState(job, JobStateInternal.SUCCEEDED); dispatcher.stop(); commitHandler.stop(); }
private static void assertJobState(JobImpl job, JobStateInternal state) { int timeToWaitMsec = 5 * 1000; while (timeToWaitMsec > 0 && job.getInternalState() != state) { try { Thread.sleep(10); timeToWaitMsec -= 10; } catch (InterruptedException e) { break; } } Assert.assertEquals(state, job.getInternalState()); }
@Override public Job<Void> submitToJobThread( int priority, @NotNull final Runnable action, Consumer<Future> onDoneCallback) { final JobImpl<Void> job = new JobImpl<Void>(priority, false); Callable<Void> callable = new Callable<Void>() { public Void call() throws Exception { try { action.run(); } catch (ProcessCanceledException ignored) { // since it's the only task in the job, nothing to cancel } return null; } }; job.addTask(callable, onDoneCallback); job.schedule(); return job; }
@Test public void testJobNoTasks() { Configuration conf = new Configuration(); conf.setInt(MRJobConfig.NUM_REDUCES, 0); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); conf.set(MRJobConfig.WORKFLOW_ID, "testId"); conf.set(MRJobConfig.WORKFLOW_NAME, "testName"); conf.set(MRJobConfig.WORKFLOW_NODE_NAME, "testNodeName"); conf.set(MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_STRING + "key1", "value1"); conf.set(MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_STRING + "key2", "value2"); conf.set(MRJobConfig.WORKFLOW_TAGS, "tag1,tag2"); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.start(); OutputCommitter committer = mock(OutputCommitter.class); CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); JobSubmittedEventHandler jseHandler = new JobSubmittedEventHandler( "testId", "testName", "testNodeName", "\"key2\"=\"value2\" \"key1\"=\"value1\" ", "tag1,tag2"); dispatcher.register(EventType.class, jseHandler); JobImpl job = createStubbedJob(conf, dispatcher, 0, null); job.handle(new JobEvent(job.getID(), JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); job.handle(new JobStartEvent(job.getID())); assertJobState(job, JobStateInternal.SUCCEEDED); dispatcher.stop(); commitHandler.stop(); try { Assert.assertTrue(jseHandler.getAssertValue()); } catch (InterruptedException e) { Assert.fail("Workflow related attributes are not tested properly"); } }
@Test public void testReportDiagnostics() throws Exception { JobID jobID = JobID.forName("job_1234567890000_0001"); JobId jobId = TypeConverter.toYarn(jobID); final String diagMsg = "some diagnostic message"; final JobDiagnosticsUpdateEvent diagUpdateEvent = new JobDiagnosticsUpdateEvent(jobId, diagMsg); MRAppMetrics mrAppMetrics = MRAppMetrics.create(); AppContext mockContext = mock(AppContext.class); when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true); JobImpl job = new JobImpl( jobId, Records.newRecord(ApplicationAttemptId.class), new Configuration(), mock(EventHandler.class), null, mock(JobTokenSecretManager.class), null, new SystemClock(), null, mrAppMetrics, null, true, null, 0, null, mockContext, null, null); job.handle(diagUpdateEvent); String diagnostics = job.getReport().getDiagnostics(); Assert.assertNotNull(diagnostics); Assert.assertTrue(diagnostics.contains(diagMsg)); job = new JobImpl( jobId, Records.newRecord(ApplicationAttemptId.class), new Configuration(), mock(EventHandler.class), null, mock(JobTokenSecretManager.class), null, new SystemClock(), null, mrAppMetrics, null, true, null, 0, null, mockContext, null, null); job.handle(new JobEvent(jobId, JobEventType.JOB_KILL)); job.handle(diagUpdateEvent); diagnostics = job.getReport().getDiagnostics(); Assert.assertNotNull(diagnostics); Assert.assertTrue(diagnostics.contains(diagMsg)); }
@Test(timeout = 20000) public void testKilledDuringFailAbort() throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.start(); OutputCommitter committer = new StubbedOutputCommitter() { @Override public void setupJob(JobContext jobContext) throws IOException { throw new IOException("forced failure"); } @Override public synchronized void abortJob(JobContext jobContext, State state) throws IOException { while (!Thread.interrupted()) { try { wait(); } catch (InterruptedException e) { } } } }; CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); JobImpl job = createStubbedJob(conf, dispatcher, 2, null); JobId jobId = job.getID(); job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); job.handle(new JobStartEvent(jobId)); assertJobState(job, JobStateInternal.FAIL_ABORT); job.handle(new JobEvent(jobId, JobEventType.JOB_KILL)); assertJobState(job, JobStateInternal.KILLED); dispatcher.stop(); commitHandler.stop(); }
@Test(timeout = 20000) public void testKilledDuringCommit() throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.start(); CyclicBarrier syncBarrier = new CyclicBarrier(2); OutputCommitter committer = new WaitingOutputCommitter(syncBarrier, true); CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, null); completeJobTasks(job); assertJobState(job, JobStateInternal.COMMITTING); syncBarrier.await(); job.handle(new JobEvent(job.getID(), JobEventType.JOB_KILL)); assertJobState(job, JobStateInternal.KILLED); dispatcher.stop(); commitHandler.stop(); }
/** Enqueue the specified job */ public void addJob(Job job) { if (job == null || !_alive) return; // This does nothing // if (job instanceof JobImpl) // ((JobImpl)job).addedToQueue(); long numReady = 0; boolean alreadyExists = false; boolean dropped = false; // getNext() is now outside the jobLock, is that ok? synchronized (_jobLock) { if (_readyJobs.contains(job)) alreadyExists = true; numReady = _readyJobs.size(); if (!alreadyExists) { // if (_timedJobs.contains(job)) // alreadyExists = true; // Always remove and re-add, since it needs to be // re-sorted in the TreeSet. boolean removed = _timedJobs.remove(job); if (removed && _log.shouldLog(Log.WARN)) _log.warn("Rescheduling job: " + job); } if ((!alreadyExists) && shouldDrop(job, numReady)) { job.dropped(); dropped = true; } else { if (!alreadyExists) { if (job.getTiming().getStartAfter() <= _context.clock().now()) { // don't skew us - its 'start after' its been queued, or later job.getTiming().setStartAfter(_context.clock().now()); if (job instanceof JobImpl) ((JobImpl) job).madeReady(); _readyJobs.offer(job); } else { _timedJobs.add(job); // only notify for _timedJobs, as _readyJobs does not use that lock // only notify if sooner, to reduce contention if (job.getTiming().getStartAfter() < _nextPumperRun) _jobLock.notifyAll(); } } } } _context.statManager().addRateData("jobQueue.readyJobs", numReady, 0); if (dropped) { _context.statManager().addRateData("jobQueue.droppedJobs", 1, 0); _log.logAlways( Log.WARN, "Dropping job due to overload! # ready jobs: " + numReady + ": job = " + job); } }
@Test public void testTransitionsAtFailed() throws IOException { Configuration conf = new Configuration(); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.start(); OutputCommitter committer = mock(OutputCommitter.class); doThrow(new IOException("forcefail")).when(committer).setupJob(any(JobContext.class)); CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); AppContext mockContext = mock(AppContext.class); when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false); JobImpl job = createStubbedJob(conf, dispatcher, 2, mockContext); JobId jobId = job.getID(); job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); job.handle(new JobStartEvent(jobId)); assertJobState(job, JobStateInternal.FAILED); job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_COMPLETED)); assertJobState(job, JobStateInternal.FAILED); job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_ATTEMPT_COMPLETED)); assertJobState(job, JobStateInternal.FAILED); job.handle(new JobEvent(jobId, JobEventType.JOB_MAP_TASK_RESCHEDULED)); assertJobState(job, JobStateInternal.FAILED); job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE)); assertJobState(job, JobStateInternal.FAILED); Assert.assertEquals(JobState.RUNNING, job.getState()); when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true); Assert.assertEquals(JobState.FAILED, job.getState()); dispatcher.stop(); commitHandler.stop(); }
private static void completeJobTasks(JobImpl job) { // complete the map tasks and the reduce tasks so we start committing int numMaps = job.getTotalMaps(); for (int i = 0; i < numMaps; ++i) { job.handle( new JobTaskEvent( MRBuilderUtils.newTaskId(job.getID(), 1, TaskType.MAP), TaskState.SUCCEEDED)); Assert.assertEquals(JobState.RUNNING, job.getState()); } int numReduces = job.getTotalReduces(); for (int i = 0; i < numReduces; ++i) { job.handle( new JobTaskEvent( MRBuilderUtils.newTaskId(job.getID(), 1, TaskType.MAP), TaskState.SUCCEEDED)); Assert.assertEquals(JobState.RUNNING, job.getState()); } }
@Test(timeout = 20000) public void testRebootedDuringSetup() throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.start(); OutputCommitter committer = new StubbedOutputCommitter() { @Override public synchronized void setupJob(JobContext jobContext) throws IOException { while (!Thread.interrupted()) { try { wait(); } catch (InterruptedException e) { } } } }; CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); AppContext mockContext = mock(AppContext.class); when(mockContext.isLastAMRetry()).thenReturn(false); JobImpl job = createStubbedJob(conf, dispatcher, 2, mockContext); JobId jobId = job.getID(); job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); job.handle(new JobStartEvent(jobId)); assertJobState(job, JobStateInternal.SETUP); job.handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); assertJobState(job, JobStateInternal.REBOOT); // return the external state as RUNNING since otherwise JobClient will // exit when it polls the AM for job state Assert.assertEquals(JobState.RUNNING, job.getState()); dispatcher.stop(); commitHandler.stop(); }
@Test public void testCheckAccess() { // Create two unique users String user1 = System.getProperty("user.name"); String user2 = user1 + "1234"; UserGroupInformation ugi1 = UserGroupInformation.createRemoteUser(user1); UserGroupInformation ugi2 = UserGroupInformation.createRemoteUser(user2); // Create the job JobID jobID = JobID.forName("job_1234567890000_0001"); JobId jobId = TypeConverter.toYarn(jobID); // Setup configuration access only to user1 (owner) Configuration conf1 = new Configuration(); conf1.setBoolean(MRConfig.MR_ACLS_ENABLED, true); conf1.set(MRJobConfig.JOB_ACL_VIEW_JOB, ""); // Verify access JobImpl job1 = new JobImpl( jobId, null, conf1, null, null, null, null, null, null, null, null, true, null, 0, null, null, null, null); Assert.assertTrue(job1.checkAccess(ugi1, JobACL.VIEW_JOB)); Assert.assertFalse(job1.checkAccess(ugi2, JobACL.VIEW_JOB)); // Setup configuration access to the user1 (owner) and user2 Configuration conf2 = new Configuration(); conf2.setBoolean(MRConfig.MR_ACLS_ENABLED, true); conf2.set(MRJobConfig.JOB_ACL_VIEW_JOB, user2); // Verify access JobImpl job2 = new JobImpl( jobId, null, conf2, null, null, null, null, null, null, null, null, true, null, 0, null, null, null, null); Assert.assertTrue(job2.checkAccess(ugi1, JobACL.VIEW_JOB)); Assert.assertTrue(job2.checkAccess(ugi2, JobACL.VIEW_JOB)); // Setup configuration access with security enabled and access to all Configuration conf3 = new Configuration(); conf3.setBoolean(MRConfig.MR_ACLS_ENABLED, true); conf3.set(MRJobConfig.JOB_ACL_VIEW_JOB, "*"); // Verify access JobImpl job3 = new JobImpl( jobId, null, conf3, null, null, null, null, null, null, null, null, true, null, 0, null, null, null, null); Assert.assertTrue(job3.checkAccess(ugi1, JobACL.VIEW_JOB)); Assert.assertTrue(job3.checkAccess(ugi2, JobACL.VIEW_JOB)); // Setup configuration access without security enabled Configuration conf4 = new Configuration(); conf4.setBoolean(MRConfig.MR_ACLS_ENABLED, false); conf4.set(MRJobConfig.JOB_ACL_VIEW_JOB, ""); // Verify access JobImpl job4 = new JobImpl( jobId, null, conf4, null, null, null, null, null, null, null, null, true, null, 0, null, null, null, null); Assert.assertTrue(job4.checkAccess(ugi1, JobACL.VIEW_JOB)); Assert.assertTrue(job4.checkAccess(ugi2, JobACL.VIEW_JOB)); // Setup configuration access without security enabled Configuration conf5 = new Configuration(); conf5.setBoolean(MRConfig.MR_ACLS_ENABLED, true); conf5.set(MRJobConfig.JOB_ACL_VIEW_JOB, ""); // Verify access JobImpl job5 = new JobImpl( jobId, null, conf5, null, null, null, null, null, null, null, null, true, null, 0, null, null, null, null); Assert.assertTrue(job5.checkAccess(ugi1, null)); Assert.assertTrue(job5.checkAccess(ugi2, null)); }
public void run() { try { while (_alive) { long now = _context.clock().now(); long timeToWait = -1; try { synchronized (_jobLock) { Job lastJob = null; long lastTime = Long.MIN_VALUE; for (Iterator<Job> iter = _timedJobs.iterator(); iter.hasNext(); ) { Job j = iter.next(); // find jobs due to start before now long timeLeft = j.getTiming().getStartAfter() - now; if (lastJob != null && lastTime > j.getTiming().getStartAfter()) { _log.error( "Job " + lastJob + " out of order with job " + j + " difference of " + DataHelper.formatDuration(lastTime - j.getTiming().getStartAfter())); } lastJob = j; lastTime = lastJob.getTiming().getStartAfter(); if (timeLeft <= 0) { if (j instanceof JobImpl) ((JobImpl) j).madeReady(); _readyJobs.offer(j); iter.remove(); } else { // if ( (timeToWait <= 0) || (timeLeft < timeToWait) ) // _timedJobs is now a TreeSet, so once we hit one that is // not ready yet, we can break // NOTE: By not going through the whole thing, a single job changing // setStartAfter() to some far-away time, without // calling addJob(), could clog the whole queue forever. // Hopefully nobody does that, and as a backup, we hope // that the TreeSet will eventually resort it from other addJob() calls. timeToWait = timeLeft; // failsafe - remove and re-add, peek at the next job, // break and go around again if (timeToWait > 10 * 1000 && iter.hasNext()) { if (_log.shouldLog(Log.INFO)) _log.info( "Failsafe re-sort job " + j + " with delay " + DataHelper.formatDuration(timeToWait)); iter.remove(); Job nextJob = iter.next(); _timedJobs.add(j); long nextTimeLeft = nextJob.getTiming().getStartAfter() - now; if (timeToWait > nextTimeLeft) { _log.error( "Job " + j + " out of order with job " + nextJob + " difference of " + DataHelper.formatDuration(timeToWait - nextTimeLeft)); timeToWait = Math.max(10, nextTimeLeft); } } break; } } if (timeToWait < 0) timeToWait = 1000; else if (timeToWait < 10) timeToWait = 10; else if (timeToWait > 10 * 1000) timeToWait = 10 * 1000; // if (_log.shouldLog(Log.DEBUG)) // _log.debug("Waiting " + timeToWait + " before rechecking the timed queue"); _nextPumperRun = _context.clock().now() + timeToWait; _jobLock.wait(timeToWait); } // synchronize (_jobLock) } catch (InterruptedException ie) { } } // while (_alive) } catch (Throwable t) { _context.clock().removeUpdateListener(this); if (_log.shouldLog(Log.ERROR)) _log.error("wtf, pumper killed", t); } }