// 分配任务和结果提交处理由于是单线程处理, // 因此本身不用做状态池并发控制,将消耗较多的发送操作交给ServerConnector多线程操作 @Override public void addTaskResultToQueue(SendResultsRequestEvent jobResponseEvent) { JobTaskResult jobTaskResult = jobResponseEvent.getJobTaskResult(); if (jobTaskResult.getTaskIds() != null && jobTaskResult.getTaskIds().size() > 0) { // 判断是否是过期的一些老任务数据,根据task和taskresult的createtime来判断 // 以后要扩展成为如果发现当前的epoch < 结果的epoch,表明这台可能是从属的master,负责reduce,但是速度跟不上了 if (jobTaskPool.get(jobTaskResult.getTaskIds().get(0)) == null) { logger.error("jobTask is null " + jobTaskResult.getTaskIds().get(0)); } if (jobTaskResult.getJobEpoch() != jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) { if (jobTaskResult.getJobEpoch() < jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) { logger.error( "old task result will be discard! job:" + jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName() + ",epoch:" + jobTaskResult.getJobEpoch() + ",slave:" + jobResponseEvent.getChannel()); masterNode.echoSendJobTaskResults( jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel()); return; } else { // 给一定的容忍时间,暂时定为5秒 jobs.get(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName()) .blockToResetJob(15000); if (jobTaskResult.getJobEpoch() > jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) { logger.error( "otherMaster can't merge in time!job:" + jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName()); masterNode.echoSendJobTaskResults( jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel()); return; } } } if (logger.isWarnEnabled()) { StringBuilder ts = new StringBuilder("Receive slave analysis result, jobTaskIds : ") .append(jobTaskResult.toString()) .append(", ") .append(jobTaskResult.getTaskIds().size()); logger.warn(ts.toString()); } // 先放入队列,防止小概率多线程并发问题 jobTaskResultsQueuePool .get(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName()) .offer(jobTaskResult); for (int i = 0; i < jobTaskResult.getTaskIds().size(); i++) { String taskId = jobTaskResult.getTaskIds().get(i); JobTask jobTask = jobTaskPool.get(taskId); if (jobTask == null) { logger.error( new StringBuilder("taskId :").append(taskId).append("not exist!").toString()); continue; } Job job = jobs.get(jobTask.getJobName()); if (job == null) { logger.error( new StringBuilder("job :") .append(jobTask.getJobName()) .append("not exist!") .toString()); continue; } if (statusPool.replace(taskId, JobTaskStatus.DOING, JobTaskStatus.DONE) || statusPool.replace(taskId, JobTaskStatus.UNDO, JobTaskStatus.DONE)) { logger.info("task " + jobTask.getJobName() + " of job " + job.getJobName() + " done"); jobTask.setStatus(JobTaskStatus.DONE); jobTask.setEndTime(System.currentTimeMillis()); jobTask.setLastMergedEpoch(job.getEpoch().get()); job.getCompletedTaskCount().incrementAndGet(); } // 对jobTask的执行结果打点 StringBuilder log = new StringBuilder(ReportUtil.SLAVE_LOG) .append(",") .append(System.currentTimeMillis()) .append(",") .append(job.getEpoch()) .append(","); log.append(jobTask.getJobName()) .append(",") .append(jobTask.getTaskId()) .append(",") .append(jobTask.getRecycleCounter().get()) .append(",") .append(jobTaskResult.getSlaveIp()) .append(",") .append(jobTaskResult.getEfficiency()) .append(","); JobTaskExecuteInfo executeInfo = jobTaskResult.getTaskExecuteInfos().get(jobTask.getTaskId()); if (executeInfo != null) log.append(executeInfo.getAnalysisConsume()) .append(",") .append(executeInfo.getJobDataSize()) .append(",") .append(executeInfo.getTotalLine()) .append(",") .append(executeInfo.getErrorLine()) .append(",") .append(executeInfo.getEmptyLine()); else logger.error( new StringBuilder() .append("taskId : ") .append(jobTask.getTaskId()) .append(" executeInfo is null!") .toString()); ReportUtil.clusterLog(log.toString()); } } // 是否需要用异步方式发送,减少对jobManager事件处理延时 if (config.isUseAsynModeToSendResponse()) { final String sequence = jobResponseEvent.getSequence(); final Object channel = jobResponseEvent.getChannel(); eventProcessThreadPool.execute( new Runnable() { public void run() { try { masterNode.echoSendJobTaskResults(sequence, "success", channel); } catch (Throwable e) { logger.error(e); } } }); } else masterNode.echoSendJobTaskResults( jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel()); }
// 分配任务和结果提交处理由于是单线程处理, // 因此本身不用做状态池并发控制,将消耗较多的发送操作交给ServerConnector多线程操作 @Override public void getUnDoJobTasks(GetTaskRequestEvent requestEvent) { String jobName = requestEvent.getJobName(); int jobCount = requestEvent.getRequestJobCount(); final List<JobTask> jobTasks = new ArrayList<JobTask>(); // 如果关闭,则直接返回一个空的JobTask的list给slave if (this.stopped) { masterNode.echoGetJobTasks(requestEvent.getSequence(), jobTasks, requestEvent.getChannel()); return; } // 指定job if (jobName != null && jobs.containsKey(jobName)) { Job job = jobs.get(jobName); List<JobTask> tasks = job.getJobTasks(); for (JobTask jobTask : tasks) { if (jobTask.getStatus().equals(JobTaskStatus.UNDO)) { if (statusPool.replace(jobTask.getTaskId(), JobTaskStatus.UNDO, JobTaskStatus.DOING)) { this.allocateTask(jobTask); jobTasks.add(jobTask); if (jobTasks.size() == jobCount) break; } } } } else { Iterator<JobTask> taskIter = undoTaskQueue.iterator(); while (taskIter.hasNext()) { // String taskId = taskIds.next(); // JobTask jobTask = jobTaskPool.get(taskId); JobTask jobTask = taskIter.next(); if (!jobTaskPool.keySet().contains(jobTask.getTaskId()) || jobs.get(jobTask.getJobName()).getEpoch().get() > jobTask.getJobEpoch() || jobs.get(jobTask.getJobName()).getJobTimeOut().get()) { taskIter.remove(); continue; } if (statusPool.get(jobTask.getTaskId()).equals(JobTaskStatus.UNDO)) { if (statusPool.replace(jobTask.getTaskId(), JobTaskStatus.UNDO, JobTaskStatus.DOING)) { this.allocateTask(jobTask); jobTasks.add(jobTask); taskIter.remove(); if (jobTasks.size() >= jobCount) break; } } else taskIter.remove(); } } // 是否需要用异步方式发送,减少对jobManager事件处理延时 if (config.isUseAsynModeToSendResponse()) { final String sequence = requestEvent.getSequence(); final Object channel = requestEvent.getChannel(); // 由于该操作比较慢,开线程执行,保证速度 eventProcessThreadPool.execute( new Runnable() { public void run() { try { masterNode.echoGetJobTasks(sequence, jobTasks, channel); } catch (Throwable e) { logger.error(e); } } }); } else masterNode.echoGetJobTasks(requestEvent.getSequence(), jobTasks, requestEvent.getChannel()); }