@Override public synchronized void checkJobStatus() throws AnalysisException { // 通过外部事件激发重新载入配置 if (jobBuilder.isNeedRebuild()) { if (logger.isInfoEnabled()) { logger.info("check job status need to rebuild"); } jobs = jobBuilder.rebuild(jobs); if (jobs == null || (jobs != null && jobs.size() == 0)) throw new AnalysisException("jobs should not be empty!"); } checkTaskStatus(); mergeAndExportJobs(); // 任务全部完成并且没有新加任务的情况下,休息1s for (Job job : jobs.values()) { if (!job.isExported().get() || job.getRebuildTag() == 2) { return; } else { try { Thread.sleep(1000); } catch (InterruptedException e) { logger.error(e); } } } }
// 做合并和导出,重置任务的检查操作 // 所有任务一起来轮询,对Master来讲,有点资源浪费 // 可以通过以下几种方式改进: // 1、针对job的属性设置监听器,Listener模式 // 2、使用Observer模式 protected void mergeAndExportJobs() { Iterator<Map.Entry<String, Job>> iter = jobs.entrySet().iterator(); while (iter.hasNext()) { Job job = iter.next().getValue(); if (job.getRebuildTag() == 2) { job.rebuild(0, null, this); continue; } if (!job.getJobTimeOut().get()) { // 需要合并该job的task if (!job.isMerging().get() && job.needMerge()) { logger.warn( "job " + job.getJobName() + " complete tasks:" + job.getCompletedTaskCount().get() + ", merged tasks :" + job.getMergedTaskCount().get()); final Job j = job; final BlockingQueue<JobMergedResult> branchResultQueue = branchResultQueuePool.get(j.getJobName()); final BlockingQueue<JobTaskResult> jobTaskResultsQueue = jobTaskResultsQueuePool.get(j.getJobName()); if (j.isMerging().compareAndSet(false, true)) eventProcessThreadPool.execute( new Runnable() { public void run() { try { jobResultMerger.merge(j, branchResultQueue, jobTaskResultsQueue, true); } catch (Throwable e) { logger.error(e); } finally { j.isMerging().set(false); } } }); } } else { // 判断是否还有和主干合并的线程,如果没有可以设置完成标识 boolean gotIt = job.getTrunkLock().writeLock().tryLock(); if (gotIt) { try { if (!job.isMerged().get()) { List<Map<String, Map<String, Object>>> mergeResults = new ArrayList<Map<String, Map<String, Object>>>(); new MergeJobOperation( job, 0, mergeResults, config, branchResultQueuePool.get(job.getJobName())) .run(); job.isMerged().set(true); logger.warn("job is timeout, last merge trunk success!"); } } finally { job.getTrunkLock().writeLock().unlock(); } } } // 需要导出该job的数据 if (!job.isExporting().get() && job.needExport()) { final Job j = job; if (j.isExporting().compareAndSet(false, true)) eventProcessThreadPool.execute( new Runnable() { public void run() { try { // 虽然是多线程,但还是阻塞模式来做 jobExporter.exportReport(j, false); j.isExported().set(true); } catch (Throwable e) { logger.error(e); } finally { j.isExporting().set(false); } // 判断是否需要开始导出中间结果,放在外部不妨碍下一次的处理 exportOrCleanTrunk(j); } }); if (job.getRebuildTag() == -1) { job.rebuild(0, null, this); iter.remove(); } if (job.getRebuildTag() == 1) { job.rebuild(0, null, this); } } // 做一次任务处理时间判断,如果超时将设置job的超时状态位置 job.checkJobTimeOut(); // 任务是否需要被重置 if (job.needReset()) { if (logger.isWarnEnabled()) logger.warn("job " + job.getJobName() + " be reset now."); StringBuilder sb = new StringBuilder(ReportUtil.MASTER_LOG) .append(",") .append(System.currentTimeMillis()) .append(","); sb.append(job.getEpoch()) .append(",") .append(job.getJobName()) .append(",") .append(System.currentTimeMillis() - job.getStartTime()) .append(",") .append(job.getJobMergeTime().get()) .append(",") .append(job.getJobExportTime()) .append(",") .append(job.getTaskCount()) .append(",") .append(job.getCompletedTaskCount().get()) .append(",") .append(job.getMergedTaskCount().get()) .append(",") .append(job.getJobMergeBranchCount().get()); ReportUtil.clusterLog(sb.toString()); job.reset(this); if (logger.isInfoEnabled()) { sb = new StringBuilder("jobManager:{jobs:") .append(jobs.size()) .append(",jobTaskPool:" + jobTaskPool.size()); sb.append(",statusPool:") .append(statusPool.size()) .append(",undoTasks:") .append(undoTaskQueue.size()) .append("}"); logger.info(sb.toString()); } List<JobTask> tasks = job.getJobTasks(); for (JobTask task : tasks) { statusPool.put(task.getTaskId(), task.getStatus()); } } } }