public static void addToEnvironment( Map<String, String> environment, String variable, String value, String classPathSeparator) { String val = environment.get(variable); if (val == null) { val = value; } else { val = val + classPathSeparator + value; } environment.put(StringInterner.weakIntern(variable), StringInterner.weakIntern(val)); }
void addToEnv(Map<String, String> env, String variable, String value) { String classPathSep = ApplicationConstants.CLASS_PATH_SEPARATOR; String val = env.get(variable); if (val == null) { val = value; } else { val = val + classPathSep + value; } env.put(StringInterner.weakIntern(variable), StringInterner.weakIntern(val)); }
static Counters fromAvro(JhCounters counters) { Counters result = new Counters(); for (JhCounterGroup g : counters.groups) { CounterGroup group = result.addGroup( StringInterner.weakIntern(g.name.toString()), StringInterner.weakIntern(g.displayName.toString())); for (JhCounter c : g.counts) { group.addCounter( StringInterner.weakIntern(c.name.toString()), StringInterner.weakIntern(c.displayName.toString()), c.value); } } return result; }
@Override public void readFields(DataInput in) throws IOException { queueName = StringInterner.weakIntern(Text.readString(in)); queueState = WritableUtils.readEnum(in, QueueState.class); schedulingInfo = StringInterner.weakIntern(Text.readString(in)); int length = in.readInt(); stats = new JobStatus[length]; for (int i = 0; i < length; i++) { stats[i] = new JobStatus(); stats[i].readFields(in); } int count = in.readInt(); children.clear(); for (int i = 0; i < count; i++) { QueueInfo childQueueInfo = new QueueInfo(); childQueueInfo.readFields(in); children.add(childQueueInfo); } }
TaskAttemptInfo(JSONObject jsonObject) throws JSONException { super(jsonObject); Preconditions.checkArgument( jsonObject .getString(Constants.ENTITY_TYPE) .equalsIgnoreCase(Constants.TEZ_TASK_ATTEMPT_ID)); taskAttemptId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY)); // Parse additional Info final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO); startTime = otherInfoNode.optLong(Constants.START_TIME); endTime = otherInfoNode.optLong(Constants.FINISH_TIME); diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS); creationTime = otherInfoNode.optLong(Constants.CREATION_TIME); creationCausalTA = StringInterner.weakIntern(otherInfoNode.optString(Constants.CREATION_CAUSAL_ATTEMPT)); allocationTime = otherInfoNode.optLong(Constants.ALLOCATION_TIME); containerId = StringInterner.weakIntern(otherInfoNode.optString(Constants.CONTAINER_ID)); String id = otherInfoNode.optString(Constants.NODE_ID); nodeId = StringInterner.weakIntern((id != null) ? (id.split(":")[0]) : ""); logUrl = otherInfoNode.optString(Constants.COMPLETED_LOGS_URL); status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS)); container = new Container(containerId, nodeId); if (otherInfoNode.has(Constants.LAST_DATA_EVENTS)) { List<DataDependencyEvent> eventInfo = Utils.parseDataEventDependencyFromJSON( otherInfoNode.optJSONObject(Constants.LAST_DATA_EVENTS)); long lastTime = 0; for (DataDependencyEvent item : eventInfo) { // check these are in time order Preconditions.checkState(lastTime < item.getTimestamp()); lastTime = item.getTimestamp(); lastDataEvents.add(item); } } terminationCause = StringInterner.weakIntern(otherInfoNode.optString(ATSConstants.TASK_ATTEMPT_ERROR_ENUM)); executionTimeInterval = (endTime > startTime) ? (endTime - startTime) : 0; }
@Public @Evolving public class TaskAttemptInfo extends BaseInfo { private static final String SUCCEEDED = StringInterner.weakIntern(TaskAttemptState.SUCCEEDED.name()); private final String taskAttemptId; private final long startTime; private final long endTime; private final String diagnostics; private final long creationTime; private final long allocationTime; private final String containerId; private final String nodeId; private final String status; private final String logUrl; private final String creationCausalTA; private final String terminationCause; private final long executionTimeInterval; // this list is in time order - array list for easy walking private final ArrayList<DataDependencyEvent> lastDataEvents = Lists.newArrayList(); private TaskInfo taskInfo; private Container container; public static class DataDependencyEvent { String taId; long timestamp; public DataDependencyEvent(String id, long time) { taId = id; timestamp = time; } public long getTimestamp() { return timestamp; } public String getTaskAttemptId() { return taId; } } TaskAttemptInfo(JSONObject jsonObject) throws JSONException { super(jsonObject); Preconditions.checkArgument( jsonObject .getString(Constants.ENTITY_TYPE) .equalsIgnoreCase(Constants.TEZ_TASK_ATTEMPT_ID)); taskAttemptId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY)); // Parse additional Info final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO); startTime = otherInfoNode.optLong(Constants.START_TIME); endTime = otherInfoNode.optLong(Constants.FINISH_TIME); diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS); creationTime = otherInfoNode.optLong(Constants.CREATION_TIME); creationCausalTA = StringInterner.weakIntern(otherInfoNode.optString(Constants.CREATION_CAUSAL_ATTEMPT)); allocationTime = otherInfoNode.optLong(Constants.ALLOCATION_TIME); containerId = StringInterner.weakIntern(otherInfoNode.optString(Constants.CONTAINER_ID)); String id = otherInfoNode.optString(Constants.NODE_ID); nodeId = StringInterner.weakIntern((id != null) ? (id.split(":")[0]) : ""); logUrl = otherInfoNode.optString(Constants.COMPLETED_LOGS_URL); status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS)); container = new Container(containerId, nodeId); if (otherInfoNode.has(Constants.LAST_DATA_EVENTS)) { List<DataDependencyEvent> eventInfo = Utils.parseDataEventDependencyFromJSON( otherInfoNode.optJSONObject(Constants.LAST_DATA_EVENTS)); long lastTime = 0; for (DataDependencyEvent item : eventInfo) { // check these are in time order Preconditions.checkState(lastTime < item.getTimestamp()); lastTime = item.getTimestamp(); lastDataEvents.add(item); } } terminationCause = StringInterner.weakIntern(otherInfoNode.optString(ATSConstants.TASK_ATTEMPT_ERROR_ENUM)); executionTimeInterval = (endTime > startTime) ? (endTime - startTime) : 0; } public static Ordering<TaskAttemptInfo> orderingOnAllocationTime() { return Ordering.from( new Comparator<TaskAttemptInfo>() { @Override public int compare(TaskAttemptInfo o1, TaskAttemptInfo o2) { return (o1.getAllocationTime() < o2.getAllocationTime() ? -1 : o1.getAllocationTime() > o2.getAllocationTime() ? 1 : 0); } }); } void setTaskInfo(TaskInfo taskInfo) { Preconditions.checkArgument(taskInfo != null, "Provide valid taskInfo"); this.taskInfo = taskInfo; taskInfo.addTaskAttemptInfo(this); } @Override public final long getStartTimeInterval() { return startTime - (getTaskInfo().getVertexInfo().getDagInfo().getStartTime()); } @Override public final long getFinishTimeInterval() { return endTime - (getTaskInfo().getVertexInfo().getDagInfo().getStartTime()); } public final boolean isSucceeded() { return status.equals(SUCCEEDED); } public final List<DataDependencyEvent> getLastDataEvents() { return lastDataEvents; } public final long getExecutionTimeInterval() { return executionTimeInterval; } public final long getPostDataExecutionTimeInterval() { if (getStartTime() > 0 && getFinishTime() > 0) { // start time defaults to the actual start time long postDataStartTime = startTime; if (getLastDataEvents() != null && !getLastDataEvents().isEmpty()) { // if last data event is after the start time then use last data event time long lastEventTime = getLastDataEvents().get(getLastDataEvents().size() - 1).getTimestamp(); postDataStartTime = startTime > lastEventTime ? startTime : lastEventTime; } return (getFinishTime() - postDataStartTime); } return -1; } public final long getAllocationToEndTimeInterval() { return (endTime - allocationTime); } public final long getAllocationToStartTimeInterval() { return (startTime - allocationTime); } public final long getCreationToAllocationTimeInterval() { return (allocationTime - creationTime); } public final long getStartTime() { return startTime; } public final long getFinishTime() { return endTime; } public final long getCreationTime() { return creationTime; } public final DataDependencyEvent getLastDataEventInfo(long timeThreshold) { for (int i = lastDataEvents.size() - 1; i >= 0; i--) { // walk back in time until we get first event that happened before the threshold DataDependencyEvent item = lastDataEvents.get(i); if (item.getTimestamp() < timeThreshold) { return item; } } return null; } public final long getTimeTaken() { return getFinishTimeInterval() - getStartTimeInterval(); } public final long getCreationTimeInterval() { return creationTime - (getTaskInfo().getVertexInfo().getDagInfo().getStartTime()); } public final String getCreationCausalTA() { return creationCausalTA; } public final long getAllocationTime() { return allocationTime; } public final String getShortName() { return getTaskInfo().getVertexInfo().getVertexName() + " : " + taskAttemptId.substring( taskAttemptId.lastIndexOf('_', taskAttemptId.lastIndexOf('_') - 1) + 1); } @Override public final String getDiagnostics() { return diagnostics; } public final String getTerminationCause() { return terminationCause; } public static TaskAttemptInfo create(JSONObject taskInfoObject) throws JSONException { return new TaskAttemptInfo(taskInfoObject); } public final boolean isLocalityInfoAvailable() { Map<String, TezCounter> dataLocalTask = getCounter(DAGCounter.class.getName(), DAGCounter.DATA_LOCAL_TASKS.toString()); Map<String, TezCounter> rackLocalTask = getCounter(DAGCounter.class.getName(), DAGCounter.RACK_LOCAL_TASKS.toString()); Map<String, TezCounter> otherLocalTask = getCounter(DAGCounter.class.getName(), DAGCounter.OTHER_LOCAL_TASKS.toString()); if (!dataLocalTask.isEmpty() || !rackLocalTask.isEmpty() || !otherLocalTask.isEmpty()) { return true; } return false; } public final String getDetailedStatus() { if (!Strings.isNullOrEmpty(getTerminationCause())) { return getStatus() + ":" + getTerminationCause(); } return getStatus(); } public final TezCounter getLocalityInfo() { Map<String, TezCounter> dataLocalTask = getCounter(DAGCounter.class.getName(), DAGCounter.DATA_LOCAL_TASKS.toString()); Map<String, TezCounter> rackLocalTask = getCounter(DAGCounter.class.getName(), DAGCounter.RACK_LOCAL_TASKS.toString()); Map<String, TezCounter> otherLocalTask = getCounter(DAGCounter.class.getName(), DAGCounter.OTHER_LOCAL_TASKS.toString()); if (!dataLocalTask.isEmpty()) { return dataLocalTask.get(DAGCounter.class.getName()); } if (!rackLocalTask.isEmpty()) { return rackLocalTask.get(DAGCounter.class.getName()); } if (!otherLocalTask.isEmpty()) { return otherLocalTask.get(DAGCounter.class.getName()); } return null; } public final TaskInfo getTaskInfo() { return taskInfo; } public final String getTaskAttemptId() { return taskAttemptId; } public final String getNodeId() { return nodeId; } public final String getStatus() { return status; } public final Container getContainer() { return container; } public final String getLogURL() { return logUrl; } /** * Get merge counter per source. Available in case of reducer task * * @return Map<String, TezCounter> merge phase time at every counter group level */ public final Map<String, TezCounter> getMergePhaseTime() { return getCounter(null, TaskCounter.MERGE_PHASE_TIME.name()); } /** * Get shuffle counter per source. Available in case of shuffle * * @return Map<String, TezCounter> shuffle phase time at every counter group level */ public final Map<String, TezCounter> getShufflePhaseTime() { return getCounter(null, TaskCounter.SHUFFLE_PHASE_TIME.name()); } /** * Get OUTPUT_BYTES counter per source. Available in case of map outputs * * @return Map<String, TezCounter> output bytes counter at every counter group */ public final Map<String, TezCounter> getTaskOutputBytes() { return getCounter(null, TaskCounter.OUTPUT_BYTES.name()); } /** * Get number of spills per source. (SPILLED_RECORDS / OUTPUT_RECORDS) * * @return Map<String, Long> spill count details */ public final Map<String, Float> getSpillCount() { Map<String, TezCounter> outputRecords = getCounter(null, "OUTPUT_RECORDS"); Map<String, TezCounter> spilledRecords = getCounter(null, "SPILLED_RECORDS"); Map<String, Float> result = Maps.newHashMap(); for (Map.Entry<String, TezCounter> entry : spilledRecords.entrySet()) { String source = entry.getKey(); long spilledVal = entry.getValue().getValue(); long outputVal = outputRecords.get(source).getValue(); result.put(source, (spilledVal * 1.0f) / (outputVal * 1.0f)); } return result; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append("["); sb.append("taskAttemptId=").append(getTaskAttemptId()).append(", "); sb.append("creationTime=").append(getCreationTimeInterval()).append(", "); sb.append("startTime=").append(getStartTimeInterval()).append(", "); sb.append("finishTime=").append(getFinishTimeInterval()).append(", "); sb.append("timeTaken=").append(getTimeTaken()).append(", "); sb.append("events=").append(getEvents()).append(", "); sb.append("diagnostics=").append(getDiagnostics()).append(", "); sb.append("container=").append(getContainer()).append(", "); sb.append("nodeId=").append(getNodeId()).append(", "); sb.append("logURL=").append(getLogURL()).append(", "); sb.append("status=").append(getStatus()); sb.append("]"); return sb.toString(); } }
@Override public void readFields(DataInput in) throws IOException { queueName = StringInterner.weakIntern(Text.readString(in)); operations = WritableUtils.readStringArray(in); }
public class CriticalPathAnalyzer extends TezAnalyzerBase implements Analyzer { String succeededState = StringInterner.weakIntern(TaskAttemptState.SUCCEEDED.name()); String failedState = StringInterner.weakIntern(TaskAttemptState.FAILED.name()); public enum CriticalPathDependency { DATA_DEPENDENCY, INIT_DEPENDENCY, COMMIT_DEPENDENCY, RETRY_DEPENDENCY, OUTPUT_RECREATE_DEPENDENCY } public static final String DRAW_SVG = "tez.critical-path-analyzer.draw-svg"; public static class CriticalPathStep { public enum EntityType { ATTEMPT, VERTEX_INIT, DAG_COMMIT } EntityType type; TaskAttemptInfo attempt; CriticalPathDependency reason; // reason linking this to the previous step on the critical path long startCriticalPathTime; // time at which attempt is on critical path long stopCriticalPathTime; // time at which attempt is off critical path List<String> notes = Lists.newLinkedList(); public CriticalPathStep(TaskAttemptInfo attempt, EntityType type) { this.type = type; this.attempt = attempt; } public EntityType getType() { return type; } public TaskAttemptInfo getAttempt() { return attempt; } public long getStartCriticalTime() { return startCriticalPathTime; } public long getStopCriticalTime() { return stopCriticalPathTime; } public CriticalPathDependency getReason() { return reason; } public List<String> getNotes() { return notes; } } List<CriticalPathStep> criticalPath = Lists.newLinkedList(); Map<String, TaskAttemptInfo> attempts = Maps.newHashMap(); public CriticalPathAnalyzer() {} @Override public void analyze(DagInfo dagInfo) throws TezException { // get all attempts in the dag and find the last failed/succeeded attempt. // ignore killed attempt to handle kills that happen upon dag completion TaskAttemptInfo lastAttempt = null; long lastAttemptFinishTime = 0; for (VertexInfo vertex : dagInfo.getVertices()) { for (TaskInfo task : vertex.getTasks()) { for (TaskAttemptInfo attempt : task.getTaskAttempts()) { attempts.put(attempt.getTaskAttemptId(), attempt); if (attempt.getStatus().equals(succeededState) || attempt.getStatus().equals(failedState)) { if (lastAttemptFinishTime < attempt.getFinishTime()) { lastAttempt = attempt; lastAttemptFinishTime = attempt.getFinishTime(); } } } } } if (lastAttempt == null) { System.out.println("Cannot find last attempt to finish in DAG " + dagInfo.getDagId()); return; } createCriticalPath(dagInfo, lastAttempt, lastAttemptFinishTime, attempts); analyzeCriticalPath(dagInfo); if (getConf().getBoolean(DRAW_SVG, true)) { saveCriticalPathAsSVG(dagInfo); } } public List<CriticalPathStep> getCriticalPath() { return criticalPath; } private void saveCriticalPathAsSVG(DagInfo dagInfo) { SVGUtils svg = new SVGUtils(); String outputFileName = getOutputDir() + File.separator + dagInfo.getDagId() + ".svg"; System.out.println("Writing output to: " + outputFileName); svg.saveCriticalPathAsSVG(dagInfo, outputFileName, criticalPath); } private void analyzeAllocationOverhead(DagInfo dag) { List<TaskAttemptInfo> preemptedAttempts = Lists.newArrayList(); for (VertexInfo v : dag.getVertices()) { for (TaskInfo t : v.getTasks()) { for (TaskAttemptInfo a : t.getTaskAttempts()) { if (a.getTerminationCause() .equals(TaskAttemptTerminationCause.INTERNAL_PREEMPTION.name())) { System.out.println("Found preempted attempt " + a.getTaskAttemptId()); preemptedAttempts.add(a); } } } } for (int i = 0; i < criticalPath.size(); ++i) { CriticalPathStep step = criticalPath.get(i); TaskAttemptInfo attempt = step.attempt; if (step.getType() != EntityType.ATTEMPT) { continue; } long creationTime = attempt.getCreationTime(); long allocationTime = attempt.getAllocationTime(); if (allocationTime < step.startCriticalPathTime) { // allocated before it became critical continue; } // the attempt is critical before allocation. So allocation overhead needs analysis Container container = attempt.getContainer(); if (container != null) { Collection<TaskAttemptInfo> attempts = dag.getContainerMapping().get(container); if (attempts != null && !attempts.isEmpty()) { // arrange attempts by allocation time List<TaskAttemptInfo> attemptsList = Lists.newArrayList(attempts); Collections.sort(attemptsList, TaskAttemptInfo.orderingOnAllocationTime()); // walk the list to record allocation time before the current attempt long containerPreviousAllocatedTime = 0; for (TaskAttemptInfo containerAttempt : attemptsList) { if (containerAttempt.getTaskAttemptId().equals(attempt.getTaskAttemptId())) { break; } System.out.println( "Container: " + container.getId() + " running att: " + containerAttempt.getTaskAttemptId() + " wait att: " + attempt.getTaskAttemptId()); containerPreviousAllocatedTime += containerAttempt.getAllocationToEndTimeInterval(); } if (containerPreviousAllocatedTime == 0) { step.notes.add("Container " + container.getId() + " newly allocated."); } else { if (containerPreviousAllocatedTime >= attempt.getCreationToAllocationTimeInterval()) { step.notes.add("Container " + container.getId() + " was fully allocated"); } else { step.notes.add( "Container " + container.getId() + " allocated for " + SVGUtils.getTimeStr(containerPreviousAllocatedTime) + " out of " + SVGUtils.getTimeStr(attempt.getCreationToAllocationTimeInterval()) + " of allocation wait time"); } } } // look for internal preemptions while attempt was waiting for allocation for (TaskAttemptInfo a : preemptedAttempts) { if (a.getFinishTime() > creationTime && a.getFinishTime() < allocationTime) { // found an attempt that was preempted within this time interval step.notes.add("Potentially waited for preemption of " + a.getShortName()); } } } } } private void analyzeStragglers(DagInfo dag) { long dagStartTime = dag.getStartTime(); long dagTime = dag.getFinishTime() - dagStartTime; long totalAttemptCriticalTime = 0; for (int i = 0; i < criticalPath.size(); ++i) { CriticalPathStep step = criticalPath.get(i); totalAttemptCriticalTime += (step.stopCriticalPathTime - step.startCriticalPathTime); TaskAttemptInfo attempt = step.attempt; if (step.getType() == EntityType.ATTEMPT) { // analyze execution overhead if (attempt.getLastDataEvents().size() > 1) { // there were read errors. that could have delayed the attempt. ignore this continue; } long avgExecutionTime = attempt.getTaskInfo().getVertexInfo().getAvgExecutionTimeInterval(); if (avgExecutionTime <= 0) { continue; } if (avgExecutionTime * 1.25 < attempt.getExecutionTimeInterval()) { step.notes.add( "Potential straggler. Execution time " + SVGUtils.getTimeStr(attempt.getExecutionTimeInterval()) + " compared to vertex average of " + SVGUtils.getTimeStr(avgExecutionTime)); } } } System.out.println( "DAG time taken: " + dagTime + " TotalAttemptTime: " + totalAttemptCriticalTime + " DAG finish time: " + dag.getFinishTime() + " DAG start time: " + dagStartTime); } private void analyzeCriticalPath(DagInfo dag) { if (!criticalPath.isEmpty()) { analyzeStragglers(dag); analyzeAllocationOverhead(dag); } } private void createCriticalPath( DagInfo dagInfo, TaskAttemptInfo lastAttempt, long lastAttemptFinishTime, Map<String, TaskAttemptInfo> attempts) { List<CriticalPathStep> tempCP = Lists.newLinkedList(); if (lastAttempt != null) { TaskAttemptInfo currentAttempt = lastAttempt; CriticalPathStep currentStep = new CriticalPathStep(currentAttempt, EntityType.DAG_COMMIT); long currentAttemptStopCriticalPathTime = lastAttemptFinishTime; // add the commit step currentStep.stopCriticalPathTime = dagInfo.getFinishTime(); currentStep.startCriticalPathTime = currentAttemptStopCriticalPathTime; currentStep.reason = CriticalPathDependency.COMMIT_DEPENDENCY; tempCP.add(currentStep); while (true) { Preconditions.checkState(currentAttempt != null); Preconditions.checkState(currentAttemptStopCriticalPathTime > 0); System.out.println( "Step: " + tempCP.size() + " Attempt: " + currentAttempt.getTaskAttemptId()); currentStep = new CriticalPathStep(currentAttempt, EntityType.ATTEMPT); currentStep.stopCriticalPathTime = currentAttemptStopCriticalPathTime; // consider the last data event seen immediately preceding the current critical path // stop time for this attempt long currentStepLastDataEventTime = 0; String currentStepLastDataTA = null; DataDependencyEvent item = currentAttempt.getLastDataEventInfo(currentStep.stopCriticalPathTime); if (item != null) { currentStepLastDataEventTime = item.getTimestamp(); currentStepLastDataTA = item.getTaskAttemptId(); } // sanity check for (CriticalPathStep previousStep : tempCP) { if (previousStep.type == EntityType.ATTEMPT) { if (previousStep.attempt.getTaskAttemptId().equals(currentAttempt.getTaskAttemptId())) { // found loop. // this should only happen for read errors in currentAttempt List<DataDependencyEvent> dataEvents = currentAttempt.getLastDataEvents(); Preconditions.checkState(dataEvents.size() > 1); // received // original and // retry data events Preconditions.checkState( currentStepLastDataEventTime < dataEvents.get(dataEvents.size() - 1).getTimestamp()); // new event is // earlier than // last } } } tempCP.add(currentStep); // find the next attempt on the critical path boolean dataDependency = false; // find out predecessor dependency if (currentStepLastDataEventTime > currentAttempt.getCreationTime()) { dataDependency = true; } long startCriticalPathTime = 0; String nextAttemptId = null; CriticalPathDependency reason = null; if (dataDependency) { // last data event was produced after the attempt was scheduled. use // data dependency // typically the case when scheduling ahead of time System.out.println("Has data dependency"); if (!Strings.isNullOrEmpty(currentStepLastDataTA)) { // there is a valid data causal TA. Use it. nextAttemptId = currentStepLastDataTA; reason = CriticalPathDependency.DATA_DEPENDENCY; startCriticalPathTime = currentStepLastDataEventTime; System.out.println("Using data dependency " + nextAttemptId); } else { // there is no valid data causal TA. This means data event came from the same vertex VertexInfo vertex = currentAttempt.getTaskInfo().getVertexInfo(); Preconditions.checkState( !vertex.getAdditionalInputInfoList().isEmpty(), "Vertex: " + vertex.getVertexId() + " has no external inputs but the last data event " + "TA is null for " + currentAttempt.getTaskAttemptId()); nextAttemptId = null; reason = CriticalPathDependency.INIT_DEPENDENCY; System.out.println("Using init dependency"); } } else { // attempt was scheduled after last data event. use scheduling dependency // typically happens for retries System.out.println("Has scheduling dependency"); if (!Strings.isNullOrEmpty(currentAttempt.getCreationCausalTA())) { // there is a scheduling causal TA. Use it. nextAttemptId = currentAttempt.getCreationCausalTA(); reason = CriticalPathDependency.RETRY_DEPENDENCY; TaskAttemptInfo nextAttempt = attempts.get(nextAttemptId); if (nextAttemptId != null) { VertexInfo currentVertex = currentAttempt.getTaskInfo().getVertexInfo(); VertexInfo nextVertex = nextAttempt.getTaskInfo().getVertexInfo(); if (!nextVertex.getVertexName().equals(currentVertex.getVertexName())) { // cause from different vertex. Might be rerun to re-generate outputs for (VertexInfo outVertex : currentVertex.getOutputVertices()) { if (nextVertex.getVertexName().equals(outVertex.getVertexName())) { // next vertex is an output vertex reason = CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY; break; } } } } if (reason == CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY) { // rescheduled due to read error. start critical at read error report time. // for now proxy own creation time for read error report time startCriticalPathTime = currentAttempt.getCreationTime(); } else { // rescheduled due to own previous attempt failure // we are critical when the previous attempt fails Preconditions.checkState(nextAttempt != null); Preconditions.checkState( nextAttempt .getTaskInfo() .getTaskId() .equals(currentAttempt.getTaskInfo().getTaskId())); startCriticalPathTime = nextAttempt.getFinishTime(); } System.out.println("Using scheduling dependency " + nextAttemptId); } else { // there is no scheduling causal TA. if (!Strings.isNullOrEmpty(currentStepLastDataTA)) { // there is a data event going to the vertex. Count the time between data event and // creation time as Initializer/Manager overhead and follow data dependency nextAttemptId = currentStepLastDataTA; reason = CriticalPathDependency.DATA_DEPENDENCY; startCriticalPathTime = currentStepLastDataEventTime; long overhead = currentAttempt.getCreationTime() - currentStepLastDataEventTime; currentStep.notes.add( "Initializer/VertexManager scheduling overhead " + SVGUtils.getTimeStr(overhead)); System.out.println("Using data dependency " + nextAttemptId); } else { // there is no scheduling causal TA and no data event casual TA. // the vertex has external input that sent the last data events // or the vertex has external input but does not use events // or the vertex has no external inputs or edges nextAttemptId = null; reason = CriticalPathDependency.INIT_DEPENDENCY; System.out.println("Using init dependency"); } } } currentStep.startCriticalPathTime = startCriticalPathTime; currentStep.reason = reason; Preconditions.checkState( currentStep.stopCriticalPathTime >= currentStep.startCriticalPathTime); if (Strings.isNullOrEmpty(nextAttemptId)) { Preconditions.checkState(reason.equals(CriticalPathDependency.INIT_DEPENDENCY)); Preconditions.checkState(startCriticalPathTime == 0); // no predecessor attempt found. this is the last step in the critical path // assume attempts start critical path time is when its scheduled. before that is // vertex initialization time currentStep.startCriticalPathTime = currentStep.attempt.getCreationTime(); // add vertex init step long initStepStopCriticalTime = currentStep.startCriticalPathTime; currentStep = new CriticalPathStep(currentAttempt, EntityType.VERTEX_INIT); currentStep.stopCriticalPathTime = initStepStopCriticalTime; currentStep.startCriticalPathTime = dagInfo.getStartTime(); currentStep.reason = CriticalPathDependency.INIT_DEPENDENCY; tempCP.add(currentStep); if (!tempCP.isEmpty()) { for (int i = tempCP.size() - 1; i >= 0; --i) { criticalPath.add(tempCP.get(i)); } } return; } currentAttempt = attempts.get(nextAttemptId); currentAttemptStopCriticalPathTime = startCriticalPathTime; } } } @Override public CSVResult getResult() throws TezException { String[] headers = { "Entity", "PathReason", "Status", "CriticalStartTime", "CriticalStopTime", "Notes" }; CSVResult csvResult = new CSVResult(headers); for (CriticalPathStep step : criticalPath) { String entity = (step.getType() == EntityType.ATTEMPT ? step.getAttempt().getTaskAttemptId() : (step.getType() == EntityType.VERTEX_INIT ? step.attempt.getTaskInfo().getVertexInfo().getVertexName() : "DAG COMMIT")); String[] record = { entity, step.getReason().name(), step.getAttempt().getDetailedStatus(), String.valueOf(step.getStartCriticalTime()), String.valueOf(step.getStopCriticalTime()), Joiner.on(";").join(step.getNotes()) }; csvResult.addRecord(record); } return csvResult; } @Override public String getName() { return "CriticalPathAnalyzer"; } @Override public String getDescription() { return "Analyze critical path of the DAG"; } @Override public Configuration getConfiguration() { return getConf(); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new CriticalPathAnalyzer(), args); System.exit(res); } }
public static String getFrameworkClasspath(Configuration conf, boolean usingArchive) { StringBuilder classpathBuilder = new StringBuilder(); boolean userClassesTakesPrecedence = conf.getBoolean( TezConfiguration.TEZ_USER_CLASSPATH_FIRST, TezConfiguration.TEZ_USER_CLASSPATH_FIRST_DEFAULT); if (userClassesTakesPrecedence) { addUserSpecifiedClasspath(classpathBuilder, conf); } String[] tezLibUrisClassPath = conf.getStrings(TezConfiguration.TEZ_LIB_URIS_CLASSPATH); if (!conf.getBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, false) && tezLibUrisClassPath != null && tezLibUrisClassPath.length != 0) { for (String c : tezLibUrisClassPath) { classpathBuilder.append(c.trim()).append(File.pathSeparator); } } else { if (conf.getBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, false)) { LOG.info( "Ignoring '" + TezConfiguration.TEZ_LIB_URIS + "' since '" + TezConfiguration.TEZ_IGNORE_LIB_URIS + "' is set to true "); } // Legacy: Next add the tez libs, if specified via an archive. if (usingArchive) { // Add PWD/tezlib/* classpathBuilder .append(Environment.PWD.$()) .append(File.separator) .append(TezConstants.TEZ_TAR_LR_NAME) .append(File.separator) .append("*") .append(File.pathSeparator); // Legacy: Add PWD/tezlib/lib/* classpathBuilder .append(Environment.PWD.$()) .append(File.separator) .append(TezConstants.TEZ_TAR_LR_NAME) .append(File.separator) .append("lib") .append(File.separator) .append("*") .append(File.pathSeparator); } } // Last add HADOOP_CLASSPATH, if it's required. if (conf.getBoolean( TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS_DEFAULT)) { for (String c : conf.getStrings( YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classpathBuilder.append(c.trim()).append(File.pathSeparator); } } else { // Setup HADOOP_CONF_DIR after PWD and tez-libs, if it's required. classpathBuilder.append(Environment.HADOOP_CONF_DIR.$()).append(File.pathSeparator); } if (!userClassesTakesPrecedence) { addUserSpecifiedClasspath(classpathBuilder, conf); } String classpath = classpathBuilder.toString(); return StringInterner.weakIntern(classpath); }
public static void replaceInEnv(Map<String, String> env, String key, String value) { env.put(StringInterner.weakIntern(key), StringInterner.weakIntern(value)); }
private static void putIfAbsent(Map<String, String> env, String key, String value) { if (!env.containsKey(key)) { env.put(StringInterner.weakIntern(key), StringInterner.weakIntern(value)); } }