@Override public void init() throws AnalysisException { // 获得任务数量 jobBuilder.setConfig(config); jobExporter.setConfig(config); jobResultMerger.setConfig(config); jobBuilder.init(); jobExporter.init(); jobResultMerger.init(); jobs = jobBuilder.build(); for (Job job : jobs.values()) { job.reset(null); } if (jobs == null || (jobs != null && jobs.size() == 0)) throw new AnalysisException("jobs should not be empty!"); jobTaskPool = new ConcurrentHashMap<String, JobTask>(); undoTaskQueue = new LinkedBlockingDeque<JobTask>(); statusPool = new ConcurrentHashMap<String, JobTaskStatus>(); jobTaskResultsQueuePool = new HashMap<String, BlockingQueue<JobTaskResult>>(); branchResultQueuePool = new HashMap<String, BlockingQueue<JobMergedResult>>(); for (String jobName : jobs.keySet()) { jobTaskResultsQueuePool.put(jobName, new LinkedBlockingQueue<JobTaskResult>()); branchResultQueuePool.put(jobName, new LinkedBlockingQueue<JobMergedResult>()); } eventProcessThreadPool = new ThreadPoolExecutor( this.config.getMaxJobEventWorker(), this.config.getMaxJobEventWorker(), 0, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("jobManagerEventProcess_worker")); masterDataRecoverWorker = new MasterDataRecoverWorker( config.getMasterName(), config.getTempStoreDataDir(), jobs, this.config); masterDataRecoverWorker.start(); addJobsToPool(); if (logger.isInfoEnabled()) logger.info("jobManager init end, MaxJobEventWorker size : " + config.getMaxJobEventWorker()); }
// 分配任务和结果提交处理由于是单线程处理, // 因此本身不用做状态池并发控制,将消耗较多的发送操作交给ServerConnector多线程操作 @Override public void addTaskResultToQueue(SendResultsRequestEvent jobResponseEvent) { JobTaskResult jobTaskResult = jobResponseEvent.getJobTaskResult(); if (jobTaskResult.getTaskIds() != null && jobTaskResult.getTaskIds().size() > 0) { // 判断是否是过期的一些老任务数据,根据task和taskresult的createtime来判断 // 以后要扩展成为如果发现当前的epoch < 结果的epoch,表明这台可能是从属的master,负责reduce,但是速度跟不上了 if (jobTaskPool.get(jobTaskResult.getTaskIds().get(0)) == null) { logger.error("jobTask is null " + jobTaskResult.getTaskIds().get(0)); } if (jobTaskResult.getJobEpoch() != jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) { if (jobTaskResult.getJobEpoch() < jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) { logger.error( "old task result will be discard! job:" + jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName() + ",epoch:" + jobTaskResult.getJobEpoch() + ",slave:" + jobResponseEvent.getChannel()); masterNode.echoSendJobTaskResults( jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel()); return; } else { // 给一定的容忍时间,暂时定为5秒 jobs.get(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName()) .blockToResetJob(15000); if (jobTaskResult.getJobEpoch() > jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) { logger.error( "otherMaster can't merge in time!job:" + jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName()); masterNode.echoSendJobTaskResults( jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel()); return; } } } if (logger.isWarnEnabled()) { StringBuilder ts = new StringBuilder("Receive slave analysis result, jobTaskIds : ") .append(jobTaskResult.toString()) .append(", ") .append(jobTaskResult.getTaskIds().size()); logger.warn(ts.toString()); } // 先放入队列,防止小概率多线程并发问题 jobTaskResultsQueuePool .get(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName()) .offer(jobTaskResult); for (int i = 0; i < jobTaskResult.getTaskIds().size(); i++) { String taskId = jobTaskResult.getTaskIds().get(i); JobTask jobTask = jobTaskPool.get(taskId); if (jobTask == null) { logger.error( new StringBuilder("taskId :").append(taskId).append("not exist!").toString()); continue; } Job job = jobs.get(jobTask.getJobName()); if (job == null) { logger.error( new StringBuilder("job :") .append(jobTask.getJobName()) .append("not exist!") .toString()); continue; } if (statusPool.replace(taskId, JobTaskStatus.DOING, JobTaskStatus.DONE) || statusPool.replace(taskId, JobTaskStatus.UNDO, JobTaskStatus.DONE)) { logger.info("task " + jobTask.getJobName() + " of job " + job.getJobName() + " done"); jobTask.setStatus(JobTaskStatus.DONE); jobTask.setEndTime(System.currentTimeMillis()); jobTask.setLastMergedEpoch(job.getEpoch().get()); job.getCompletedTaskCount().incrementAndGet(); } // 对jobTask的执行结果打点 StringBuilder log = new StringBuilder(ReportUtil.SLAVE_LOG) .append(",") .append(System.currentTimeMillis()) .append(",") .append(job.getEpoch()) .append(","); log.append(jobTask.getJobName()) .append(",") .append(jobTask.getTaskId()) .append(",") .append(jobTask.getRecycleCounter().get()) .append(",") .append(jobTaskResult.getSlaveIp()) .append(",") .append(jobTaskResult.getEfficiency()) .append(","); JobTaskExecuteInfo executeInfo = jobTaskResult.getTaskExecuteInfos().get(jobTask.getTaskId()); if (executeInfo != null) log.append(executeInfo.getAnalysisConsume()) .append(",") .append(executeInfo.getJobDataSize()) .append(",") .append(executeInfo.getTotalLine()) .append(",") .append(executeInfo.getErrorLine()) .append(",") .append(executeInfo.getEmptyLine()); else logger.error( new StringBuilder() .append("taskId : ") .append(jobTask.getTaskId()) .append(" executeInfo is null!") .toString()); ReportUtil.clusterLog(log.toString()); } } // 是否需要用异步方式发送,减少对jobManager事件处理延时 if (config.isUseAsynModeToSendResponse()) { final String sequence = jobResponseEvent.getSequence(); final Object channel = jobResponseEvent.getChannel(); eventProcessThreadPool.execute( new Runnable() { public void run() { try { masterNode.echoSendJobTaskResults(sequence, "success", channel); } catch (Throwable e) { logger.error(e); } } }); } else masterNode.echoSendJobTaskResults( jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel()); }
/** * 在导出数据以后,判断是否需要清空主干,是否需要导出主干 * * @param job */ protected void exportOrCleanTrunk(Job job) { boolean needToSetJobResultNull = false; // 判断是否到了报表的有效时间段,支持小时,日,月三种方式 if (job.getJobConfig().getReportPeriodDefine().equals(AnalysisConstants.REPORT_PERIOD_DAY)) { Calendar calendar = Calendar.getInstance(); int now = calendar.get(Calendar.DAY_OF_MONTH); if (job.getReportPeriodFlag() != -1 && now != job.getReportPeriodFlag()) needToSetJobResultNull = true; job.setReportPeriodFlag(now); } else { if (job.getJobConfig().getReportPeriodDefine().equals(AnalysisConstants.REPORT_PERIOD_HOUR)) { Calendar calendar = Calendar.getInstance(); int now = calendar.get(Calendar.HOUR_OF_DAY); if (job.getReportPeriodFlag() != -1 && now != job.getReportPeriodFlag()) needToSetJobResultNull = true; job.setReportPeriodFlag(now); } else { if (job.getJobConfig() .getReportPeriodDefine() .equals(AnalysisConstants.REPORT_PERIOD_MONTH)) { Calendar calendar = Calendar.getInstance(); int now = calendar.get(Calendar.MONTH); if (job.getReportPeriodFlag() != -1 && now != job.getReportPeriodFlag()) needToSetJobResultNull = true; job.setReportPeriodFlag(now); } } } if (needToSetJobResultNull) { job.setJobResult(null); job.getEpoch().set(0); // 删除临时文件,防止重复载入使得清空不生效 if (config.getSaveTmpResultToFile()) { JobDataOperation jobDataOperation = new JobDataOperation(job, AnalysisConstants.JOBMANAGER_EVENT_DEL_DATAFILE, this.config); jobDataOperation.run(); } if (logger.isWarnEnabled()) logger.warn("job " + job.getJobName() + " report data be reset.it's a new start. "); } // 清除主干数据,到时候自然会载入 if (config.getSaveTmpResultToFile() && (job.getJobConfig().getSaveTmpResultToFile() == null || job.getJobConfig().getSaveTmpResultToFile())) { logger.warn("@disk2Mem mode: start " + job.getJobName() + " store trunk to disk now ."); JobDataOperation jobDataOperation = new JobDataOperation( job, AnalysisConstants.JOBMANAGER_EVENT_SETNULL_EXPORTDATA, this.config); jobDataOperation.run(); } else { if (job.getLastExportTime() == 0 || System.currentTimeMillis() - job.getLastExportTime() >= config.getExportInterval() || stopped) { logger.warn("export job: " + job.getJobName() + " trunk to disk."); JobDataOperation jobDataOperation = new JobDataOperation(job, AnalysisConstants.JOBMANAGER_EVENT_EXPORTDATA, this.config); jobDataOperation.run(); } } }
// 分配任务和结果提交处理由于是单线程处理, // 因此本身不用做状态池并发控制,将消耗较多的发送操作交给ServerConnector多线程操作 @Override public void getUnDoJobTasks(GetTaskRequestEvent requestEvent) { String jobName = requestEvent.getJobName(); int jobCount = requestEvent.getRequestJobCount(); final List<JobTask> jobTasks = new ArrayList<JobTask>(); // 如果关闭,则直接返回一个空的JobTask的list给slave if (this.stopped) { masterNode.echoGetJobTasks(requestEvent.getSequence(), jobTasks, requestEvent.getChannel()); return; } // 指定job if (jobName != null && jobs.containsKey(jobName)) { Job job = jobs.get(jobName); List<JobTask> tasks = job.getJobTasks(); for (JobTask jobTask : tasks) { if (jobTask.getStatus().equals(JobTaskStatus.UNDO)) { if (statusPool.replace(jobTask.getTaskId(), JobTaskStatus.UNDO, JobTaskStatus.DOING)) { this.allocateTask(jobTask); jobTasks.add(jobTask); if (jobTasks.size() == jobCount) break; } } } } else { Iterator<JobTask> taskIter = undoTaskQueue.iterator(); while (taskIter.hasNext()) { // String taskId = taskIds.next(); // JobTask jobTask = jobTaskPool.get(taskId); JobTask jobTask = taskIter.next(); if (!jobTaskPool.keySet().contains(jobTask.getTaskId()) || jobs.get(jobTask.getJobName()).getEpoch().get() > jobTask.getJobEpoch() || jobs.get(jobTask.getJobName()).getJobTimeOut().get()) { taskIter.remove(); continue; } if (statusPool.get(jobTask.getTaskId()).equals(JobTaskStatus.UNDO)) { if (statusPool.replace(jobTask.getTaskId(), JobTaskStatus.UNDO, JobTaskStatus.DOING)) { this.allocateTask(jobTask); jobTasks.add(jobTask); taskIter.remove(); if (jobTasks.size() >= jobCount) break; } } else taskIter.remove(); } } // 是否需要用异步方式发送,减少对jobManager事件处理延时 if (config.isUseAsynModeToSendResponse()) { final String sequence = requestEvent.getSequence(); final Object channel = requestEvent.getChannel(); // 由于该操作比较慢,开线程执行,保证速度 eventProcessThreadPool.execute( new Runnable() { public void run() { try { masterNode.echoGetJobTasks(sequence, jobTasks, channel); } catch (Throwable e) { logger.error(e); } } }); } else masterNode.echoGetJobTasks(requestEvent.getSequence(), jobTasks, requestEvent.getChannel()); }