예제 #1
0
  // 做合并和导出,重置任务的检查操作
  // 所有任务一起来轮询,对Master来讲,有点资源浪费
  // 可以通过以下几种方式改进:
  // 1、针对job的属性设置监听器,Listener模式
  // 2、使用Observer模式
  protected void mergeAndExportJobs() {
    Iterator<Map.Entry<String, Job>> iter = jobs.entrySet().iterator();
    while (iter.hasNext()) {
      Job job = iter.next().getValue();
      if (job.getRebuildTag() == 2) {
        job.rebuild(0, null, this);
        continue;
      }
      if (!job.getJobTimeOut().get()) {
        // 需要合并该job的task
        if (!job.isMerging().get() && job.needMerge()) {
          logger.warn(
              "job "
                  + job.getJobName()
                  + " complete tasks:"
                  + job.getCompletedTaskCount().get()
                  + ", merged tasks :"
                  + job.getMergedTaskCount().get());
          final Job j = job;
          final BlockingQueue<JobMergedResult> branchResultQueue =
              branchResultQueuePool.get(j.getJobName());
          final BlockingQueue<JobTaskResult> jobTaskResultsQueue =
              jobTaskResultsQueuePool.get(j.getJobName());

          if (j.isMerging().compareAndSet(false, true))
            eventProcessThreadPool.execute(
                new Runnable() {
                  public void run() {
                    try {
                      jobResultMerger.merge(j, branchResultQueue, jobTaskResultsQueue, true);
                    } catch (Throwable e) {
                      logger.error(e);
                    } finally {
                      j.isMerging().set(false);
                    }
                  }
                });
        }
      } else {
        // 判断是否还有和主干合并的线程,如果没有可以设置完成标识
        boolean gotIt = job.getTrunkLock().writeLock().tryLock();

        if (gotIt) {
          try {
            if (!job.isMerged().get()) {
              List<Map<String, Map<String, Object>>> mergeResults =
                  new ArrayList<Map<String, Map<String, Object>>>();
              new MergeJobOperation(
                      job, 0, mergeResults, config, branchResultQueuePool.get(job.getJobName()))
                  .run();

              job.isMerged().set(true);
              logger.warn("job is timeout, last merge trunk success!");
            }
          } finally {
            job.getTrunkLock().writeLock().unlock();
          }
        }
      }

      // 需要导出该job的数据
      if (!job.isExporting().get() && job.needExport()) {
        final Job j = job;

        if (j.isExporting().compareAndSet(false, true))
          eventProcessThreadPool.execute(
              new Runnable() {
                public void run() {
                  try {
                    // 虽然是多线程,但还是阻塞模式来做
                    jobExporter.exportReport(j, false);
                    j.isExported().set(true);
                  } catch (Throwable e) {
                    logger.error(e);
                  } finally {
                    j.isExporting().set(false);
                  }

                  // 判断是否需要开始导出中间结果,放在外部不妨碍下一次的处理
                  exportOrCleanTrunk(j);
                }
              });
        if (job.getRebuildTag() == -1) {
          job.rebuild(0, null, this);
          iter.remove();
        }
        if (job.getRebuildTag() == 1) {
          job.rebuild(0, null, this);
        }
      }

      // 做一次任务处理时间判断,如果超时将设置job的超时状态位置
      job.checkJobTimeOut();

      // 任务是否需要被重置
      if (job.needReset()) {
        if (logger.isWarnEnabled()) logger.warn("job " + job.getJobName() + " be reset now.");

        StringBuilder sb =
            new StringBuilder(ReportUtil.MASTER_LOG)
                .append(",")
                .append(System.currentTimeMillis())
                .append(",");
        sb.append(job.getEpoch())
            .append(",")
            .append(job.getJobName())
            .append(",")
            .append(System.currentTimeMillis() - job.getStartTime())
            .append(",")
            .append(job.getJobMergeTime().get())
            .append(",")
            .append(job.getJobExportTime())
            .append(",")
            .append(job.getTaskCount())
            .append(",")
            .append(job.getCompletedTaskCount().get())
            .append(",")
            .append(job.getMergedTaskCount().get())
            .append(",")
            .append(job.getJobMergeBranchCount().get());
        ReportUtil.clusterLog(sb.toString());

        job.reset(this);

        if (logger.isInfoEnabled()) {
          sb =
              new StringBuilder("jobManager:{jobs:")
                  .append(jobs.size())
                  .append(",jobTaskPool:" + jobTaskPool.size());
          sb.append(",statusPool:")
              .append(statusPool.size())
              .append(",undoTasks:")
              .append(undoTaskQueue.size())
              .append("}");
          logger.info(sb.toString());
        }

        List<JobTask> tasks = job.getJobTasks();

        for (JobTask task : tasks) {
          statusPool.put(task.getTaskId(), task.getStatus());
        }
      }
    }
  }
예제 #2
0
  // 分配任务和结果提交处理由于是单线程处理,
  // 因此本身不用做状态池并发控制,将消耗较多的发送操作交给ServerConnector多线程操作
  @Override
  public void addTaskResultToQueue(SendResultsRequestEvent jobResponseEvent) {

    JobTaskResult jobTaskResult = jobResponseEvent.getJobTaskResult();

    if (jobTaskResult.getTaskIds() != null && jobTaskResult.getTaskIds().size() > 0) {
      // 判断是否是过期的一些老任务数据,根据task和taskresult的createtime来判断
      // 以后要扩展成为如果发现当前的epoch < 结果的epoch,表明这台可能是从属的master,负责reduce,但是速度跟不上了
      if (jobTaskPool.get(jobTaskResult.getTaskIds().get(0)) == null) {
        logger.error("jobTask is null " + jobTaskResult.getTaskIds().get(0));
      }
      if (jobTaskResult.getJobEpoch()
          != jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) {

        if (jobTaskResult.getJobEpoch()
            < jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) {
          logger.error(
              "old task result will be discard! job:"
                  + jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName()
                  + ",epoch:"
                  + jobTaskResult.getJobEpoch()
                  + ",slave:"
                  + jobResponseEvent.getChannel());
          masterNode.echoSendJobTaskResults(
              jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel());
          return;
        } else {
          // 给一定的容忍时间,暂时定为5秒
          jobs.get(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName())
              .blockToResetJob(15000);

          if (jobTaskResult.getJobEpoch()
              > jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) {
            logger.error(
                "otherMaster can't merge in time!job:"
                    + jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName());
            masterNode.echoSendJobTaskResults(
                jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel());
            return;
          }
        }
      }

      if (logger.isWarnEnabled()) {
        StringBuilder ts =
            new StringBuilder("Receive slave analysis result, jobTaskIds : ")
                .append(jobTaskResult.toString())
                .append(", ")
                .append(jobTaskResult.getTaskIds().size());
        logger.warn(ts.toString());
      }

      // 先放入队列,防止小概率多线程并发问题
      jobTaskResultsQueuePool
          .get(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName())
          .offer(jobTaskResult);

      for (int i = 0; i < jobTaskResult.getTaskIds().size(); i++) {
        String taskId = jobTaskResult.getTaskIds().get(i);
        JobTask jobTask = jobTaskPool.get(taskId);

        if (jobTask == null) {
          logger.error(
              new StringBuilder("taskId :").append(taskId).append("not exist!").toString());
          continue;
        }

        Job job = jobs.get(jobTask.getJobName());
        if (job == null) {
          logger.error(
              new StringBuilder("job :")
                  .append(jobTask.getJobName())
                  .append("not exist!")
                  .toString());
          continue;
        }

        if (statusPool.replace(taskId, JobTaskStatus.DOING, JobTaskStatus.DONE)
            || statusPool.replace(taskId, JobTaskStatus.UNDO, JobTaskStatus.DONE)) {
          logger.info("task " + jobTask.getJobName() + " of job " + job.getJobName() + " done");
          jobTask.setStatus(JobTaskStatus.DONE);
          jobTask.setEndTime(System.currentTimeMillis());
          jobTask.setLastMergedEpoch(job.getEpoch().get());
          job.getCompletedTaskCount().incrementAndGet();
        }

        // 对jobTask的执行结果打点
        StringBuilder log =
            new StringBuilder(ReportUtil.SLAVE_LOG)
                .append(",")
                .append(System.currentTimeMillis())
                .append(",")
                .append(job.getEpoch())
                .append(",");
        log.append(jobTask.getJobName())
            .append(",")
            .append(jobTask.getTaskId())
            .append(",")
            .append(jobTask.getRecycleCounter().get())
            .append(",")
            .append(jobTaskResult.getSlaveIp())
            .append(",")
            .append(jobTaskResult.getEfficiency())
            .append(",");

        JobTaskExecuteInfo executeInfo =
            jobTaskResult.getTaskExecuteInfos().get(jobTask.getTaskId());

        if (executeInfo != null)
          log.append(executeInfo.getAnalysisConsume())
              .append(",")
              .append(executeInfo.getJobDataSize())
              .append(",")
              .append(executeInfo.getTotalLine())
              .append(",")
              .append(executeInfo.getErrorLine())
              .append(",")
              .append(executeInfo.getEmptyLine());
        else
          logger.error(
              new StringBuilder()
                  .append("taskId : ")
                  .append(jobTask.getTaskId())
                  .append(" executeInfo is null!")
                  .toString());

        ReportUtil.clusterLog(log.toString());
      }
    }

    // 是否需要用异步方式发送,减少对jobManager事件处理延时
    if (config.isUseAsynModeToSendResponse()) {
      final String sequence = jobResponseEvent.getSequence();
      final Object channel = jobResponseEvent.getChannel();

      eventProcessThreadPool.execute(
          new Runnable() {
            public void run() {
              try {
                masterNode.echoSendJobTaskResults(sequence, "success", channel);
              } catch (Throwable e) {
                logger.error(e);
              }
            }
          });
    } else
      masterNode.echoSendJobTaskResults(
          jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel());
  }