コード例 #1
0
 private void analyzeStragglers(DagInfo dag) {
   long dagStartTime = dag.getStartTime();
   long dagTime = dag.getFinishTime() - dagStartTime;
   long totalAttemptCriticalTime = 0;
   for (int i = 0; i < criticalPath.size(); ++i) {
     CriticalPathStep step = criticalPath.get(i);
     totalAttemptCriticalTime += (step.stopCriticalPathTime - step.startCriticalPathTime);
     TaskAttemptInfo attempt = step.attempt;
     if (step.getType() == EntityType.ATTEMPT) {
       // analyze execution overhead
       if (attempt.getLastDataEvents().size() > 1) {
         // there were read errors. that could have delayed the attempt. ignore this
         continue;
       }
       long avgExecutionTime = attempt.getTaskInfo().getVertexInfo().getAvgExecutionTimeInterval();
       if (avgExecutionTime <= 0) {
         continue;
       }
       if (avgExecutionTime * 1.25 < attempt.getExecutionTimeInterval()) {
         step.notes.add(
             "Potential straggler. Execution time "
                 + SVGUtils.getTimeStr(attempt.getExecutionTimeInterval())
                 + " compared to vertex average of "
                 + SVGUtils.getTimeStr(avgExecutionTime));
       }
     }
   }
   System.out.println(
       "DAG time taken: "
           + dagTime
           + " TotalAttemptTime: "
           + totalAttemptCriticalTime
           + " DAG finish time: "
           + dag.getFinishTime()
           + " DAG start time: "
           + dagStartTime);
 }
コード例 #2
0
  private void createCriticalPath(
      DagInfo dagInfo,
      TaskAttemptInfo lastAttempt,
      long lastAttemptFinishTime,
      Map<String, TaskAttemptInfo> attempts) {
    List<CriticalPathStep> tempCP = Lists.newLinkedList();
    if (lastAttempt != null) {
      TaskAttemptInfo currentAttempt = lastAttempt;
      CriticalPathStep currentStep = new CriticalPathStep(currentAttempt, EntityType.DAG_COMMIT);
      long currentAttemptStopCriticalPathTime = lastAttemptFinishTime;

      // add the commit step
      currentStep.stopCriticalPathTime = dagInfo.getFinishTime();
      currentStep.startCriticalPathTime = currentAttemptStopCriticalPathTime;
      currentStep.reason = CriticalPathDependency.COMMIT_DEPENDENCY;
      tempCP.add(currentStep);

      while (true) {
        Preconditions.checkState(currentAttempt != null);
        Preconditions.checkState(currentAttemptStopCriticalPathTime > 0);
        System.out.println(
            "Step: " + tempCP.size() + " Attempt: " + currentAttempt.getTaskAttemptId());

        currentStep = new CriticalPathStep(currentAttempt, EntityType.ATTEMPT);
        currentStep.stopCriticalPathTime = currentAttemptStopCriticalPathTime;

        // consider the last data event seen immediately preceding the current critical path
        // stop time for this attempt
        long currentStepLastDataEventTime = 0;
        String currentStepLastDataTA = null;
        DataDependencyEvent item =
            currentAttempt.getLastDataEventInfo(currentStep.stopCriticalPathTime);
        if (item != null) {
          currentStepLastDataEventTime = item.getTimestamp();
          currentStepLastDataTA = item.getTaskAttemptId();
        }

        // sanity check
        for (CriticalPathStep previousStep : tempCP) {
          if (previousStep.type == EntityType.ATTEMPT) {
            if (previousStep.attempt.getTaskAttemptId().equals(currentAttempt.getTaskAttemptId())) {
              // found loop.
              // this should only happen for read errors in currentAttempt
              List<DataDependencyEvent> dataEvents = currentAttempt.getLastDataEvents();
              Preconditions.checkState(dataEvents.size() > 1); // received
              // original and
              // retry data events
              Preconditions.checkState(
                  currentStepLastDataEventTime
                      < dataEvents.get(dataEvents.size() - 1).getTimestamp()); // new event is
              // earlier than
              // last
            }
          }
        }

        tempCP.add(currentStep);

        // find the next attempt on the critical path
        boolean dataDependency = false;
        // find out predecessor dependency
        if (currentStepLastDataEventTime > currentAttempt.getCreationTime()) {
          dataDependency = true;
        }

        long startCriticalPathTime = 0;
        String nextAttemptId = null;
        CriticalPathDependency reason = null;
        if (dataDependency) {
          // last data event was produced after the attempt was scheduled. use
          // data dependency
          // typically the case when scheduling ahead of time
          System.out.println("Has data dependency");
          if (!Strings.isNullOrEmpty(currentStepLastDataTA)) {
            // there is a valid data causal TA. Use it.
            nextAttemptId = currentStepLastDataTA;
            reason = CriticalPathDependency.DATA_DEPENDENCY;
            startCriticalPathTime = currentStepLastDataEventTime;
            System.out.println("Using data dependency " + nextAttemptId);
          } else {
            // there is no valid data causal TA. This means data event came from the same vertex
            VertexInfo vertex = currentAttempt.getTaskInfo().getVertexInfo();
            Preconditions.checkState(
                !vertex.getAdditionalInputInfoList().isEmpty(),
                "Vertex: "
                    + vertex.getVertexId()
                    + " has no external inputs but the last data event "
                    + "TA is null for "
                    + currentAttempt.getTaskAttemptId());
            nextAttemptId = null;
            reason = CriticalPathDependency.INIT_DEPENDENCY;
            System.out.println("Using init dependency");
          }
        } else {
          // attempt was scheduled after last data event. use scheduling dependency
          // typically happens for retries
          System.out.println("Has scheduling dependency");
          if (!Strings.isNullOrEmpty(currentAttempt.getCreationCausalTA())) {
            // there is a scheduling causal TA. Use it.
            nextAttemptId = currentAttempt.getCreationCausalTA();
            reason = CriticalPathDependency.RETRY_DEPENDENCY;
            TaskAttemptInfo nextAttempt = attempts.get(nextAttemptId);
            if (nextAttemptId != null) {
              VertexInfo currentVertex = currentAttempt.getTaskInfo().getVertexInfo();
              VertexInfo nextVertex = nextAttempt.getTaskInfo().getVertexInfo();
              if (!nextVertex.getVertexName().equals(currentVertex.getVertexName())) {
                // cause from different vertex. Might be rerun to re-generate outputs
                for (VertexInfo outVertex : currentVertex.getOutputVertices()) {
                  if (nextVertex.getVertexName().equals(outVertex.getVertexName())) {
                    // next vertex is an output vertex
                    reason = CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY;
                    break;
                  }
                }
              }
            }
            if (reason == CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY) {
              // rescheduled due to read error. start critical at read error report time.
              // for now proxy own creation time for read error report time
              startCriticalPathTime = currentAttempt.getCreationTime();
            } else {
              // rescheduled due to own previous attempt failure
              // we are critical when the previous attempt fails
              Preconditions.checkState(nextAttempt != null);
              Preconditions.checkState(
                  nextAttempt
                      .getTaskInfo()
                      .getTaskId()
                      .equals(currentAttempt.getTaskInfo().getTaskId()));
              startCriticalPathTime = nextAttempt.getFinishTime();
            }
            System.out.println("Using scheduling dependency " + nextAttemptId);
          } else {
            // there is no scheduling causal TA.
            if (!Strings.isNullOrEmpty(currentStepLastDataTA)) {
              // there is a data event going to the vertex. Count the time between data event and
              // creation time as Initializer/Manager overhead and follow data dependency
              nextAttemptId = currentStepLastDataTA;
              reason = CriticalPathDependency.DATA_DEPENDENCY;
              startCriticalPathTime = currentStepLastDataEventTime;
              long overhead = currentAttempt.getCreationTime() - currentStepLastDataEventTime;
              currentStep.notes.add(
                  "Initializer/VertexManager scheduling overhead " + SVGUtils.getTimeStr(overhead));
              System.out.println("Using data dependency " + nextAttemptId);
            } else {
              // there is no scheduling causal TA and no data event casual TA.
              // the vertex has external input that sent the last data events
              // or the vertex has external input but does not use events
              // or the vertex has no external inputs or edges
              nextAttemptId = null;
              reason = CriticalPathDependency.INIT_DEPENDENCY;
              System.out.println("Using init dependency");
            }
          }
        }

        currentStep.startCriticalPathTime = startCriticalPathTime;
        currentStep.reason = reason;

        Preconditions.checkState(
            currentStep.stopCriticalPathTime >= currentStep.startCriticalPathTime);

        if (Strings.isNullOrEmpty(nextAttemptId)) {
          Preconditions.checkState(reason.equals(CriticalPathDependency.INIT_DEPENDENCY));
          Preconditions.checkState(startCriticalPathTime == 0);
          // no predecessor attempt found. this is the last step in the critical path
          // assume attempts start critical path time is when its scheduled. before that is
          // vertex initialization time
          currentStep.startCriticalPathTime = currentStep.attempt.getCreationTime();

          // add vertex init step
          long initStepStopCriticalTime = currentStep.startCriticalPathTime;
          currentStep = new CriticalPathStep(currentAttempt, EntityType.VERTEX_INIT);
          currentStep.stopCriticalPathTime = initStepStopCriticalTime;
          currentStep.startCriticalPathTime = dagInfo.getStartTime();
          currentStep.reason = CriticalPathDependency.INIT_DEPENDENCY;
          tempCP.add(currentStep);

          if (!tempCP.isEmpty()) {
            for (int i = tempCP.size() - 1; i >= 0; --i) {
              criticalPath.add(tempCP.get(i));
            }
          }
          return;
        }

        currentAttempt = attempts.get(nextAttemptId);
        currentAttemptStopCriticalPathTime = startCriticalPathTime;
      }
    }
  }