Example #1
0
 public static void addToEnvironment(
     Map<String, String> environment, String variable, String value, String classPathSeparator) {
   String val = environment.get(variable);
   if (val == null) {
     val = value;
   } else {
     val = val + classPathSeparator + value;
   }
   environment.put(StringInterner.weakIntern(variable), StringInterner.weakIntern(val));
 }
  void addToEnv(Map<String, String> env, String variable, String value) {
    String classPathSep = ApplicationConstants.CLASS_PATH_SEPARATOR;
    String val = env.get(variable);
    if (val == null) {
      val = value;
    } else {
      val = val + classPathSep + value;
    }

    env.put(StringInterner.weakIntern(variable), StringInterner.weakIntern(val));
  }
Example #3
0
 static Counters fromAvro(JhCounters counters) {
   Counters result = new Counters();
   for (JhCounterGroup g : counters.groups) {
     CounterGroup group =
         result.addGroup(
             StringInterner.weakIntern(g.name.toString()),
             StringInterner.weakIntern(g.displayName.toString()));
     for (JhCounter c : g.counts) {
       group.addCounter(
           StringInterner.weakIntern(c.name.toString()),
           StringInterner.weakIntern(c.displayName.toString()),
           c.value);
     }
   }
   return result;
 }
Example #4
0
 @Override
 public void readFields(DataInput in) throws IOException {
   queueName = StringInterner.weakIntern(Text.readString(in));
   queueState = WritableUtils.readEnum(in, QueueState.class);
   schedulingInfo = StringInterner.weakIntern(Text.readString(in));
   int length = in.readInt();
   stats = new JobStatus[length];
   for (int i = 0; i < length; i++) {
     stats[i] = new JobStatus();
     stats[i].readFields(in);
   }
   int count = in.readInt();
   children.clear();
   for (int i = 0; i < count; i++) {
     QueueInfo childQueueInfo = new QueueInfo();
     childQueueInfo.readFields(in);
     children.add(childQueueInfo);
   }
 }
Example #5
0
  TaskAttemptInfo(JSONObject jsonObject) throws JSONException {
    super(jsonObject);

    Preconditions.checkArgument(
        jsonObject
            .getString(Constants.ENTITY_TYPE)
            .equalsIgnoreCase(Constants.TEZ_TASK_ATTEMPT_ID));

    taskAttemptId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY));

    // Parse additional Info
    final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO);
    startTime = otherInfoNode.optLong(Constants.START_TIME);
    endTime = otherInfoNode.optLong(Constants.FINISH_TIME);
    diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS);
    creationTime = otherInfoNode.optLong(Constants.CREATION_TIME);
    creationCausalTA =
        StringInterner.weakIntern(otherInfoNode.optString(Constants.CREATION_CAUSAL_ATTEMPT));
    allocationTime = otherInfoNode.optLong(Constants.ALLOCATION_TIME);
    containerId = StringInterner.weakIntern(otherInfoNode.optString(Constants.CONTAINER_ID));
    String id = otherInfoNode.optString(Constants.NODE_ID);
    nodeId = StringInterner.weakIntern((id != null) ? (id.split(":")[0]) : "");
    logUrl = otherInfoNode.optString(Constants.COMPLETED_LOGS_URL);

    status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS));
    container = new Container(containerId, nodeId);
    if (otherInfoNode.has(Constants.LAST_DATA_EVENTS)) {
      List<DataDependencyEvent> eventInfo =
          Utils.parseDataEventDependencyFromJSON(
              otherInfoNode.optJSONObject(Constants.LAST_DATA_EVENTS));
      long lastTime = 0;
      for (DataDependencyEvent item : eventInfo) {
        // check these are in time order
        Preconditions.checkState(lastTime < item.getTimestamp());
        lastTime = item.getTimestamp();
        lastDataEvents.add(item);
      }
    }
    terminationCause =
        StringInterner.weakIntern(otherInfoNode.optString(ATSConstants.TASK_ATTEMPT_ERROR_ENUM));
    executionTimeInterval = (endTime > startTime) ? (endTime - startTime) : 0;
  }
Example #6
0
@Public
@Evolving
public class TaskAttemptInfo extends BaseInfo {

  private static final String SUCCEEDED =
      StringInterner.weakIntern(TaskAttemptState.SUCCEEDED.name());

  private final String taskAttemptId;
  private final long startTime;
  private final long endTime;
  private final String diagnostics;

  private final long creationTime;
  private final long allocationTime;
  private final String containerId;
  private final String nodeId;
  private final String status;
  private final String logUrl;
  private final String creationCausalTA;
  private final String terminationCause;
  private final long executionTimeInterval;
  // this list is in time order - array list for easy walking
  private final ArrayList<DataDependencyEvent> lastDataEvents = Lists.newArrayList();

  private TaskInfo taskInfo;

  private Container container;

  public static class DataDependencyEvent {
    String taId;
    long timestamp;

    public DataDependencyEvent(String id, long time) {
      taId = id;
      timestamp = time;
    }

    public long getTimestamp() {
      return timestamp;
    }

    public String getTaskAttemptId() {
      return taId;
    }
  }

  TaskAttemptInfo(JSONObject jsonObject) throws JSONException {
    super(jsonObject);

    Preconditions.checkArgument(
        jsonObject
            .getString(Constants.ENTITY_TYPE)
            .equalsIgnoreCase(Constants.TEZ_TASK_ATTEMPT_ID));

    taskAttemptId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY));

    // Parse additional Info
    final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO);
    startTime = otherInfoNode.optLong(Constants.START_TIME);
    endTime = otherInfoNode.optLong(Constants.FINISH_TIME);
    diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS);
    creationTime = otherInfoNode.optLong(Constants.CREATION_TIME);
    creationCausalTA =
        StringInterner.weakIntern(otherInfoNode.optString(Constants.CREATION_CAUSAL_ATTEMPT));
    allocationTime = otherInfoNode.optLong(Constants.ALLOCATION_TIME);
    containerId = StringInterner.weakIntern(otherInfoNode.optString(Constants.CONTAINER_ID));
    String id = otherInfoNode.optString(Constants.NODE_ID);
    nodeId = StringInterner.weakIntern((id != null) ? (id.split(":")[0]) : "");
    logUrl = otherInfoNode.optString(Constants.COMPLETED_LOGS_URL);

    status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS));
    container = new Container(containerId, nodeId);
    if (otherInfoNode.has(Constants.LAST_DATA_EVENTS)) {
      List<DataDependencyEvent> eventInfo =
          Utils.parseDataEventDependencyFromJSON(
              otherInfoNode.optJSONObject(Constants.LAST_DATA_EVENTS));
      long lastTime = 0;
      for (DataDependencyEvent item : eventInfo) {
        // check these are in time order
        Preconditions.checkState(lastTime < item.getTimestamp());
        lastTime = item.getTimestamp();
        lastDataEvents.add(item);
      }
    }
    terminationCause =
        StringInterner.weakIntern(otherInfoNode.optString(ATSConstants.TASK_ATTEMPT_ERROR_ENUM));
    executionTimeInterval = (endTime > startTime) ? (endTime - startTime) : 0;
  }

  public static Ordering<TaskAttemptInfo> orderingOnAllocationTime() {
    return Ordering.from(
        new Comparator<TaskAttemptInfo>() {
          @Override
          public int compare(TaskAttemptInfo o1, TaskAttemptInfo o2) {
            return (o1.getAllocationTime() < o2.getAllocationTime()
                ? -1
                : o1.getAllocationTime() > o2.getAllocationTime() ? 1 : 0);
          }
        });
  }

  void setTaskInfo(TaskInfo taskInfo) {
    Preconditions.checkArgument(taskInfo != null, "Provide valid taskInfo");
    this.taskInfo = taskInfo;
    taskInfo.addTaskAttemptInfo(this);
  }

  @Override
  public final long getStartTimeInterval() {
    return startTime - (getTaskInfo().getVertexInfo().getDagInfo().getStartTime());
  }

  @Override
  public final long getFinishTimeInterval() {
    return endTime - (getTaskInfo().getVertexInfo().getDagInfo().getStartTime());
  }

  public final boolean isSucceeded() {
    return status.equals(SUCCEEDED);
  }

  public final List<DataDependencyEvent> getLastDataEvents() {
    return lastDataEvents;
  }

  public final long getExecutionTimeInterval() {
    return executionTimeInterval;
  }

  public final long getPostDataExecutionTimeInterval() {
    if (getStartTime() > 0 && getFinishTime() > 0) {
      // start time defaults to the actual start time
      long postDataStartTime = startTime;
      if (getLastDataEvents() != null && !getLastDataEvents().isEmpty()) {
        // if last data event is after the start time then use last data event time
        long lastEventTime = getLastDataEvents().get(getLastDataEvents().size() - 1).getTimestamp();
        postDataStartTime = startTime > lastEventTime ? startTime : lastEventTime;
      }
      return (getFinishTime() - postDataStartTime);
    }
    return -1;
  }

  public final long getAllocationToEndTimeInterval() {
    return (endTime - allocationTime);
  }

  public final long getAllocationToStartTimeInterval() {
    return (startTime - allocationTime);
  }

  public final long getCreationToAllocationTimeInterval() {
    return (allocationTime - creationTime);
  }

  public final long getStartTime() {
    return startTime;
  }

  public final long getFinishTime() {
    return endTime;
  }

  public final long getCreationTime() {
    return creationTime;
  }

  public final DataDependencyEvent getLastDataEventInfo(long timeThreshold) {
    for (int i = lastDataEvents.size() - 1; i >= 0; i--) {
      // walk back in time until we get first event that happened before the threshold
      DataDependencyEvent item = lastDataEvents.get(i);
      if (item.getTimestamp() < timeThreshold) {
        return item;
      }
    }
    return null;
  }

  public final long getTimeTaken() {
    return getFinishTimeInterval() - getStartTimeInterval();
  }

  public final long getCreationTimeInterval() {
    return creationTime - (getTaskInfo().getVertexInfo().getDagInfo().getStartTime());
  }

  public final String getCreationCausalTA() {
    return creationCausalTA;
  }

  public final long getAllocationTime() {
    return allocationTime;
  }

  public final String getShortName() {
    return getTaskInfo().getVertexInfo().getVertexName()
        + " : "
        + taskAttemptId.substring(
            taskAttemptId.lastIndexOf('_', taskAttemptId.lastIndexOf('_') - 1) + 1);
  }

  @Override
  public final String getDiagnostics() {
    return diagnostics;
  }

  public final String getTerminationCause() {
    return terminationCause;
  }

  public static TaskAttemptInfo create(JSONObject taskInfoObject) throws JSONException {
    return new TaskAttemptInfo(taskInfoObject);
  }

  public final boolean isLocalityInfoAvailable() {
    Map<String, TezCounter> dataLocalTask =
        getCounter(DAGCounter.class.getName(), DAGCounter.DATA_LOCAL_TASKS.toString());
    Map<String, TezCounter> rackLocalTask =
        getCounter(DAGCounter.class.getName(), DAGCounter.RACK_LOCAL_TASKS.toString());

    Map<String, TezCounter> otherLocalTask =
        getCounter(DAGCounter.class.getName(), DAGCounter.OTHER_LOCAL_TASKS.toString());

    if (!dataLocalTask.isEmpty() || !rackLocalTask.isEmpty() || !otherLocalTask.isEmpty()) {
      return true;
    }
    return false;
  }

  public final String getDetailedStatus() {
    if (!Strings.isNullOrEmpty(getTerminationCause())) {
      return getStatus() + ":" + getTerminationCause();
    }
    return getStatus();
  }

  public final TezCounter getLocalityInfo() {
    Map<String, TezCounter> dataLocalTask =
        getCounter(DAGCounter.class.getName(), DAGCounter.DATA_LOCAL_TASKS.toString());
    Map<String, TezCounter> rackLocalTask =
        getCounter(DAGCounter.class.getName(), DAGCounter.RACK_LOCAL_TASKS.toString());
    Map<String, TezCounter> otherLocalTask =
        getCounter(DAGCounter.class.getName(), DAGCounter.OTHER_LOCAL_TASKS.toString());

    if (!dataLocalTask.isEmpty()) {
      return dataLocalTask.get(DAGCounter.class.getName());
    }

    if (!rackLocalTask.isEmpty()) {
      return rackLocalTask.get(DAGCounter.class.getName());
    }

    if (!otherLocalTask.isEmpty()) {
      return otherLocalTask.get(DAGCounter.class.getName());
    }
    return null;
  }

  public final TaskInfo getTaskInfo() {
    return taskInfo;
  }

  public final String getTaskAttemptId() {
    return taskAttemptId;
  }

  public final String getNodeId() {
    return nodeId;
  }

  public final String getStatus() {
    return status;
  }

  public final Container getContainer() {
    return container;
  }

  public final String getLogURL() {
    return logUrl;
  }

  /**
   * Get merge counter per source. Available in case of reducer task
   *
   * @return Map<String, TezCounter> merge phase time at every counter group level
   */
  public final Map<String, TezCounter> getMergePhaseTime() {
    return getCounter(null, TaskCounter.MERGE_PHASE_TIME.name());
  }

  /**
   * Get shuffle counter per source. Available in case of shuffle
   *
   * @return Map<String, TezCounter> shuffle phase time at every counter group level
   */
  public final Map<String, TezCounter> getShufflePhaseTime() {
    return getCounter(null, TaskCounter.SHUFFLE_PHASE_TIME.name());
  }

  /**
   * Get OUTPUT_BYTES counter per source. Available in case of map outputs
   *
   * @return Map<String, TezCounter> output bytes counter at every counter group
   */
  public final Map<String, TezCounter> getTaskOutputBytes() {
    return getCounter(null, TaskCounter.OUTPUT_BYTES.name());
  }

  /**
   * Get number of spills per source. (SPILLED_RECORDS / OUTPUT_RECORDS)
   *
   * @return Map<String, Long> spill count details
   */
  public final Map<String, Float> getSpillCount() {
    Map<String, TezCounter> outputRecords = getCounter(null, "OUTPUT_RECORDS");
    Map<String, TezCounter> spilledRecords = getCounter(null, "SPILLED_RECORDS");
    Map<String, Float> result = Maps.newHashMap();
    for (Map.Entry<String, TezCounter> entry : spilledRecords.entrySet()) {
      String source = entry.getKey();
      long spilledVal = entry.getValue().getValue();
      long outputVal = outputRecords.get(source).getValue();
      result.put(source, (spilledVal * 1.0f) / (outputVal * 1.0f));
    }
    return result;
  }

  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append("[");
    sb.append("taskAttemptId=").append(getTaskAttemptId()).append(", ");
    sb.append("creationTime=").append(getCreationTimeInterval()).append(", ");
    sb.append("startTime=").append(getStartTimeInterval()).append(", ");
    sb.append("finishTime=").append(getFinishTimeInterval()).append(", ");
    sb.append("timeTaken=").append(getTimeTaken()).append(", ");
    sb.append("events=").append(getEvents()).append(", ");
    sb.append("diagnostics=").append(getDiagnostics()).append(", ");
    sb.append("container=").append(getContainer()).append(", ");
    sb.append("nodeId=").append(getNodeId()).append(", ");
    sb.append("logURL=").append(getLogURL()).append(", ");
    sb.append("status=").append(getStatus());
    sb.append("]");
    return sb.toString();
  }
}
 @Override
 public void readFields(DataInput in) throws IOException {
   queueName = StringInterner.weakIntern(Text.readString(in));
   operations = WritableUtils.readStringArray(in);
 }
public class CriticalPathAnalyzer extends TezAnalyzerBase implements Analyzer {

  String succeededState = StringInterner.weakIntern(TaskAttemptState.SUCCEEDED.name());
  String failedState = StringInterner.weakIntern(TaskAttemptState.FAILED.name());

  public enum CriticalPathDependency {
    DATA_DEPENDENCY,
    INIT_DEPENDENCY,
    COMMIT_DEPENDENCY,
    RETRY_DEPENDENCY,
    OUTPUT_RECREATE_DEPENDENCY
  }

  public static final String DRAW_SVG = "tez.critical-path-analyzer.draw-svg";

  public static class CriticalPathStep {
    public enum EntityType {
      ATTEMPT,
      VERTEX_INIT,
      DAG_COMMIT
    }

    EntityType type;
    TaskAttemptInfo attempt;
    CriticalPathDependency reason; // reason linking this to the previous step on the critical path
    long startCriticalPathTime; // time at which attempt is on critical path
    long stopCriticalPathTime; // time at which attempt is off critical path
    List<String> notes = Lists.newLinkedList();

    public CriticalPathStep(TaskAttemptInfo attempt, EntityType type) {
      this.type = type;
      this.attempt = attempt;
    }

    public EntityType getType() {
      return type;
    }

    public TaskAttemptInfo getAttempt() {
      return attempt;
    }

    public long getStartCriticalTime() {
      return startCriticalPathTime;
    }

    public long getStopCriticalTime() {
      return stopCriticalPathTime;
    }

    public CriticalPathDependency getReason() {
      return reason;
    }

    public List<String> getNotes() {
      return notes;
    }
  }

  List<CriticalPathStep> criticalPath = Lists.newLinkedList();

  Map<String, TaskAttemptInfo> attempts = Maps.newHashMap();

  public CriticalPathAnalyzer() {}

  @Override
  public void analyze(DagInfo dagInfo) throws TezException {
    // get all attempts in the dag and find the last failed/succeeded attempt.
    // ignore killed attempt to handle kills that happen upon dag completion
    TaskAttemptInfo lastAttempt = null;
    long lastAttemptFinishTime = 0;
    for (VertexInfo vertex : dagInfo.getVertices()) {
      for (TaskInfo task : vertex.getTasks()) {
        for (TaskAttemptInfo attempt : task.getTaskAttempts()) {
          attempts.put(attempt.getTaskAttemptId(), attempt);
          if (attempt.getStatus().equals(succeededState)
              || attempt.getStatus().equals(failedState)) {
            if (lastAttemptFinishTime < attempt.getFinishTime()) {
              lastAttempt = attempt;
              lastAttemptFinishTime = attempt.getFinishTime();
            }
          }
        }
      }
    }

    if (lastAttempt == null) {
      System.out.println("Cannot find last attempt to finish in DAG " + dagInfo.getDagId());
      return;
    }

    createCriticalPath(dagInfo, lastAttempt, lastAttemptFinishTime, attempts);

    analyzeCriticalPath(dagInfo);

    if (getConf().getBoolean(DRAW_SVG, true)) {
      saveCriticalPathAsSVG(dagInfo);
    }
  }

  public List<CriticalPathStep> getCriticalPath() {
    return criticalPath;
  }

  private void saveCriticalPathAsSVG(DagInfo dagInfo) {
    SVGUtils svg = new SVGUtils();
    String outputFileName = getOutputDir() + File.separator + dagInfo.getDagId() + ".svg";
    System.out.println("Writing output to: " + outputFileName);
    svg.saveCriticalPathAsSVG(dagInfo, outputFileName, criticalPath);
  }

  private void analyzeAllocationOverhead(DagInfo dag) {
    List<TaskAttemptInfo> preemptedAttempts = Lists.newArrayList();
    for (VertexInfo v : dag.getVertices()) {
      for (TaskInfo t : v.getTasks()) {
        for (TaskAttemptInfo a : t.getTaskAttempts()) {
          if (a.getTerminationCause()
              .equals(TaskAttemptTerminationCause.INTERNAL_PREEMPTION.name())) {
            System.out.println("Found preempted attempt " + a.getTaskAttemptId());
            preemptedAttempts.add(a);
          }
        }
      }
    }
    for (int i = 0; i < criticalPath.size(); ++i) {
      CriticalPathStep step = criticalPath.get(i);
      TaskAttemptInfo attempt = step.attempt;
      if (step.getType() != EntityType.ATTEMPT) {
        continue;
      }

      long creationTime = attempt.getCreationTime();
      long allocationTime = attempt.getAllocationTime();
      if (allocationTime < step.startCriticalPathTime) {
        // allocated before it became critical
        continue;
      }

      // the attempt is critical before allocation. So allocation overhead needs analysis
      Container container = attempt.getContainer();
      if (container != null) {
        Collection<TaskAttemptInfo> attempts = dag.getContainerMapping().get(container);
        if (attempts != null && !attempts.isEmpty()) {
          // arrange attempts by allocation time
          List<TaskAttemptInfo> attemptsList = Lists.newArrayList(attempts);
          Collections.sort(attemptsList, TaskAttemptInfo.orderingOnAllocationTime());
          // walk the list to record allocation time before the current attempt
          long containerPreviousAllocatedTime = 0;
          for (TaskAttemptInfo containerAttempt : attemptsList) {
            if (containerAttempt.getTaskAttemptId().equals(attempt.getTaskAttemptId())) {
              break;
            }
            System.out.println(
                "Container: "
                    + container.getId()
                    + " running att: "
                    + containerAttempt.getTaskAttemptId()
                    + " wait att: "
                    + attempt.getTaskAttemptId());
            containerPreviousAllocatedTime += containerAttempt.getAllocationToEndTimeInterval();
          }
          if (containerPreviousAllocatedTime == 0) {
            step.notes.add("Container " + container.getId() + " newly allocated.");
          } else {
            if (containerPreviousAllocatedTime >= attempt.getCreationToAllocationTimeInterval()) {
              step.notes.add("Container " + container.getId() + " was fully allocated");
            } else {
              step.notes.add(
                  "Container "
                      + container.getId()
                      + " allocated for "
                      + SVGUtils.getTimeStr(containerPreviousAllocatedTime)
                      + " out of "
                      + SVGUtils.getTimeStr(attempt.getCreationToAllocationTimeInterval())
                      + " of allocation wait time");
            }
          }
        }
        // look for internal preemptions while attempt was waiting for allocation
        for (TaskAttemptInfo a : preemptedAttempts) {
          if (a.getFinishTime() > creationTime && a.getFinishTime() < allocationTime) {
            // found an attempt that was preempted within this time interval
            step.notes.add("Potentially waited for preemption of " + a.getShortName());
          }
        }
      }
    }
  }

  private void analyzeStragglers(DagInfo dag) {
    long dagStartTime = dag.getStartTime();
    long dagTime = dag.getFinishTime() - dagStartTime;
    long totalAttemptCriticalTime = 0;
    for (int i = 0; i < criticalPath.size(); ++i) {
      CriticalPathStep step = criticalPath.get(i);
      totalAttemptCriticalTime += (step.stopCriticalPathTime - step.startCriticalPathTime);
      TaskAttemptInfo attempt = step.attempt;
      if (step.getType() == EntityType.ATTEMPT) {
        // analyze execution overhead
        if (attempt.getLastDataEvents().size() > 1) {
          // there were read errors. that could have delayed the attempt. ignore this
          continue;
        }
        long avgExecutionTime = attempt.getTaskInfo().getVertexInfo().getAvgExecutionTimeInterval();
        if (avgExecutionTime <= 0) {
          continue;
        }
        if (avgExecutionTime * 1.25 < attempt.getExecutionTimeInterval()) {
          step.notes.add(
              "Potential straggler. Execution time "
                  + SVGUtils.getTimeStr(attempt.getExecutionTimeInterval())
                  + " compared to vertex average of "
                  + SVGUtils.getTimeStr(avgExecutionTime));
        }
      }
    }
    System.out.println(
        "DAG time taken: "
            + dagTime
            + " TotalAttemptTime: "
            + totalAttemptCriticalTime
            + " DAG finish time: "
            + dag.getFinishTime()
            + " DAG start time: "
            + dagStartTime);
  }

  private void analyzeCriticalPath(DagInfo dag) {
    if (!criticalPath.isEmpty()) {
      analyzeStragglers(dag);
      analyzeAllocationOverhead(dag);
    }
  }

  private void createCriticalPath(
      DagInfo dagInfo,
      TaskAttemptInfo lastAttempt,
      long lastAttemptFinishTime,
      Map<String, TaskAttemptInfo> attempts) {
    List<CriticalPathStep> tempCP = Lists.newLinkedList();
    if (lastAttempt != null) {
      TaskAttemptInfo currentAttempt = lastAttempt;
      CriticalPathStep currentStep = new CriticalPathStep(currentAttempt, EntityType.DAG_COMMIT);
      long currentAttemptStopCriticalPathTime = lastAttemptFinishTime;

      // add the commit step
      currentStep.stopCriticalPathTime = dagInfo.getFinishTime();
      currentStep.startCriticalPathTime = currentAttemptStopCriticalPathTime;
      currentStep.reason = CriticalPathDependency.COMMIT_DEPENDENCY;
      tempCP.add(currentStep);

      while (true) {
        Preconditions.checkState(currentAttempt != null);
        Preconditions.checkState(currentAttemptStopCriticalPathTime > 0);
        System.out.println(
            "Step: " + tempCP.size() + " Attempt: " + currentAttempt.getTaskAttemptId());

        currentStep = new CriticalPathStep(currentAttempt, EntityType.ATTEMPT);
        currentStep.stopCriticalPathTime = currentAttemptStopCriticalPathTime;

        // consider the last data event seen immediately preceding the current critical path
        // stop time for this attempt
        long currentStepLastDataEventTime = 0;
        String currentStepLastDataTA = null;
        DataDependencyEvent item =
            currentAttempt.getLastDataEventInfo(currentStep.stopCriticalPathTime);
        if (item != null) {
          currentStepLastDataEventTime = item.getTimestamp();
          currentStepLastDataTA = item.getTaskAttemptId();
        }

        // sanity check
        for (CriticalPathStep previousStep : tempCP) {
          if (previousStep.type == EntityType.ATTEMPT) {
            if (previousStep.attempt.getTaskAttemptId().equals(currentAttempt.getTaskAttemptId())) {
              // found loop.
              // this should only happen for read errors in currentAttempt
              List<DataDependencyEvent> dataEvents = currentAttempt.getLastDataEvents();
              Preconditions.checkState(dataEvents.size() > 1); // received
              // original and
              // retry data events
              Preconditions.checkState(
                  currentStepLastDataEventTime
                      < dataEvents.get(dataEvents.size() - 1).getTimestamp()); // new event is
              // earlier than
              // last
            }
          }
        }

        tempCP.add(currentStep);

        // find the next attempt on the critical path
        boolean dataDependency = false;
        // find out predecessor dependency
        if (currentStepLastDataEventTime > currentAttempt.getCreationTime()) {
          dataDependency = true;
        }

        long startCriticalPathTime = 0;
        String nextAttemptId = null;
        CriticalPathDependency reason = null;
        if (dataDependency) {
          // last data event was produced after the attempt was scheduled. use
          // data dependency
          // typically the case when scheduling ahead of time
          System.out.println("Has data dependency");
          if (!Strings.isNullOrEmpty(currentStepLastDataTA)) {
            // there is a valid data causal TA. Use it.
            nextAttemptId = currentStepLastDataTA;
            reason = CriticalPathDependency.DATA_DEPENDENCY;
            startCriticalPathTime = currentStepLastDataEventTime;
            System.out.println("Using data dependency " + nextAttemptId);
          } else {
            // there is no valid data causal TA. This means data event came from the same vertex
            VertexInfo vertex = currentAttempt.getTaskInfo().getVertexInfo();
            Preconditions.checkState(
                !vertex.getAdditionalInputInfoList().isEmpty(),
                "Vertex: "
                    + vertex.getVertexId()
                    + " has no external inputs but the last data event "
                    + "TA is null for "
                    + currentAttempt.getTaskAttemptId());
            nextAttemptId = null;
            reason = CriticalPathDependency.INIT_DEPENDENCY;
            System.out.println("Using init dependency");
          }
        } else {
          // attempt was scheduled after last data event. use scheduling dependency
          // typically happens for retries
          System.out.println("Has scheduling dependency");
          if (!Strings.isNullOrEmpty(currentAttempt.getCreationCausalTA())) {
            // there is a scheduling causal TA. Use it.
            nextAttemptId = currentAttempt.getCreationCausalTA();
            reason = CriticalPathDependency.RETRY_DEPENDENCY;
            TaskAttemptInfo nextAttempt = attempts.get(nextAttemptId);
            if (nextAttemptId != null) {
              VertexInfo currentVertex = currentAttempt.getTaskInfo().getVertexInfo();
              VertexInfo nextVertex = nextAttempt.getTaskInfo().getVertexInfo();
              if (!nextVertex.getVertexName().equals(currentVertex.getVertexName())) {
                // cause from different vertex. Might be rerun to re-generate outputs
                for (VertexInfo outVertex : currentVertex.getOutputVertices()) {
                  if (nextVertex.getVertexName().equals(outVertex.getVertexName())) {
                    // next vertex is an output vertex
                    reason = CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY;
                    break;
                  }
                }
              }
            }
            if (reason == CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY) {
              // rescheduled due to read error. start critical at read error report time.
              // for now proxy own creation time for read error report time
              startCriticalPathTime = currentAttempt.getCreationTime();
            } else {
              // rescheduled due to own previous attempt failure
              // we are critical when the previous attempt fails
              Preconditions.checkState(nextAttempt != null);
              Preconditions.checkState(
                  nextAttempt
                      .getTaskInfo()
                      .getTaskId()
                      .equals(currentAttempt.getTaskInfo().getTaskId()));
              startCriticalPathTime = nextAttempt.getFinishTime();
            }
            System.out.println("Using scheduling dependency " + nextAttemptId);
          } else {
            // there is no scheduling causal TA.
            if (!Strings.isNullOrEmpty(currentStepLastDataTA)) {
              // there is a data event going to the vertex. Count the time between data event and
              // creation time as Initializer/Manager overhead and follow data dependency
              nextAttemptId = currentStepLastDataTA;
              reason = CriticalPathDependency.DATA_DEPENDENCY;
              startCriticalPathTime = currentStepLastDataEventTime;
              long overhead = currentAttempt.getCreationTime() - currentStepLastDataEventTime;
              currentStep.notes.add(
                  "Initializer/VertexManager scheduling overhead " + SVGUtils.getTimeStr(overhead));
              System.out.println("Using data dependency " + nextAttemptId);
            } else {
              // there is no scheduling causal TA and no data event casual TA.
              // the vertex has external input that sent the last data events
              // or the vertex has external input but does not use events
              // or the vertex has no external inputs or edges
              nextAttemptId = null;
              reason = CriticalPathDependency.INIT_DEPENDENCY;
              System.out.println("Using init dependency");
            }
          }
        }

        currentStep.startCriticalPathTime = startCriticalPathTime;
        currentStep.reason = reason;

        Preconditions.checkState(
            currentStep.stopCriticalPathTime >= currentStep.startCriticalPathTime);

        if (Strings.isNullOrEmpty(nextAttemptId)) {
          Preconditions.checkState(reason.equals(CriticalPathDependency.INIT_DEPENDENCY));
          Preconditions.checkState(startCriticalPathTime == 0);
          // no predecessor attempt found. this is the last step in the critical path
          // assume attempts start critical path time is when its scheduled. before that is
          // vertex initialization time
          currentStep.startCriticalPathTime = currentStep.attempt.getCreationTime();

          // add vertex init step
          long initStepStopCriticalTime = currentStep.startCriticalPathTime;
          currentStep = new CriticalPathStep(currentAttempt, EntityType.VERTEX_INIT);
          currentStep.stopCriticalPathTime = initStepStopCriticalTime;
          currentStep.startCriticalPathTime = dagInfo.getStartTime();
          currentStep.reason = CriticalPathDependency.INIT_DEPENDENCY;
          tempCP.add(currentStep);

          if (!tempCP.isEmpty()) {
            for (int i = tempCP.size() - 1; i >= 0; --i) {
              criticalPath.add(tempCP.get(i));
            }
          }
          return;
        }

        currentAttempt = attempts.get(nextAttemptId);
        currentAttemptStopCriticalPathTime = startCriticalPathTime;
      }
    }
  }

  @Override
  public CSVResult getResult() throws TezException {
    String[] headers = {
      "Entity", "PathReason", "Status", "CriticalStartTime", "CriticalStopTime", "Notes"
    };

    CSVResult csvResult = new CSVResult(headers);
    for (CriticalPathStep step : criticalPath) {
      String entity =
          (step.getType() == EntityType.ATTEMPT
              ? step.getAttempt().getTaskAttemptId()
              : (step.getType() == EntityType.VERTEX_INIT
                  ? step.attempt.getTaskInfo().getVertexInfo().getVertexName()
                  : "DAG COMMIT"));
      String[] record = {
        entity,
        step.getReason().name(),
        step.getAttempt().getDetailedStatus(),
        String.valueOf(step.getStartCriticalTime()),
        String.valueOf(step.getStopCriticalTime()),
        Joiner.on(";").join(step.getNotes())
      };
      csvResult.addRecord(record);
    }
    return csvResult;
  }

  @Override
  public String getName() {
    return "CriticalPathAnalyzer";
  }

  @Override
  public String getDescription() {
    return "Analyze critical path of the DAG";
  }

  @Override
  public Configuration getConfiguration() {
    return getConf();
  }

  public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new Configuration(), new CriticalPathAnalyzer(), args);
    System.exit(res);
  }
}
Example #9
0
  public static String getFrameworkClasspath(Configuration conf, boolean usingArchive) {
    StringBuilder classpathBuilder = new StringBuilder();
    boolean userClassesTakesPrecedence =
        conf.getBoolean(
            TezConfiguration.TEZ_USER_CLASSPATH_FIRST,
            TezConfiguration.TEZ_USER_CLASSPATH_FIRST_DEFAULT);
    if (userClassesTakesPrecedence) {
      addUserSpecifiedClasspath(classpathBuilder, conf);
    }

    String[] tezLibUrisClassPath = conf.getStrings(TezConfiguration.TEZ_LIB_URIS_CLASSPATH);

    if (!conf.getBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, false)
        && tezLibUrisClassPath != null
        && tezLibUrisClassPath.length != 0) {
      for (String c : tezLibUrisClassPath) {
        classpathBuilder.append(c.trim()).append(File.pathSeparator);
      }
    } else {
      if (conf.getBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, false)) {
        LOG.info(
            "Ignoring '"
                + TezConfiguration.TEZ_LIB_URIS
                + "' since  '"
                + TezConfiguration.TEZ_IGNORE_LIB_URIS
                + "' is set to true ");
      }

      // Legacy: Next add the tez libs, if specified via an archive.
      if (usingArchive) {
        // Add PWD/tezlib/*
        classpathBuilder
            .append(Environment.PWD.$())
            .append(File.separator)
            .append(TezConstants.TEZ_TAR_LR_NAME)
            .append(File.separator)
            .append("*")
            .append(File.pathSeparator);

        // Legacy: Add PWD/tezlib/lib/*
        classpathBuilder
            .append(Environment.PWD.$())
            .append(File.separator)
            .append(TezConstants.TEZ_TAR_LR_NAME)
            .append(File.separator)
            .append("lib")
            .append(File.separator)
            .append("*")
            .append(File.pathSeparator);
      }
    }
    // Last add HADOOP_CLASSPATH, if it's required.
    if (conf.getBoolean(
        TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS,
        TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS_DEFAULT)) {
      for (String c :
          conf.getStrings(
              YarnConfiguration.YARN_APPLICATION_CLASSPATH,
              YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
        classpathBuilder.append(c.trim()).append(File.pathSeparator);
      }
    } else {
      // Setup HADOOP_CONF_DIR after PWD and tez-libs, if it's required.
      classpathBuilder.append(Environment.HADOOP_CONF_DIR.$()).append(File.pathSeparator);
    }

    if (!userClassesTakesPrecedence) {
      addUserSpecifiedClasspath(classpathBuilder, conf);
    }
    String classpath = classpathBuilder.toString();
    return StringInterner.weakIntern(classpath);
  }
Example #10
0
 public static void replaceInEnv(Map<String, String> env, String key, String value) {
   env.put(StringInterner.weakIntern(key), StringInterner.weakIntern(value));
 }
Example #11
0
 private static void putIfAbsent(Map<String, String> env, String key, String value) {
   if (!env.containsKey(key)) {
     env.put(StringInterner.weakIntern(key), StringInterner.weakIntern(value));
   }
 }