Beispiel #1
0
  @JsonIgnore
  public void setJobStats(HadoopStepStats stats) {
    Map<String, Long> counterNameToValue = counterGroupInfoMapHelper(stats);

    // job metrics
    int totalMappers = stats.getNumMapTasks();
    int totalReducers = stats.getNumReduceTasks();
    Map<String, Number> metrics = new HashMap<String, Number>();
    metrics.put("numberMaps", totalMappers);
    metrics.put("numberReduces", totalReducers);
    metrics.put(
        "avgMapTime",
        getAvgCounterValue(counterNameToValue, MetricsCounter.SLOTS_MILLIS_MAPS, totalMappers));
    metrics.put(
        "avgReduceTime",
        getAvgCounterValue(counterNameToValue, MetricsCounter.SLOTS_MILLIS_REDUCES, totalReducers));
    metrics.put(
        "bytesWritten", getCounterValue(counterNameToValue, MetricsCounter.FILE_BYTES_WRITTEN));
    metrics.put(
        "hdfsBytesWritten", getCounterValue(counterNameToValue, MetricsCounter.HDFS_BYTES_WRITTEN));
    metrics.put(
        "mapInputRecords", getCounterValue(counterNameToValue, MetricsCounter.MAP_INPUT_RECORDS));
    metrics.put(
        "mapOutputRecords", getCounterValue(counterNameToValue, MetricsCounter.MAP_OUTPUT_RECORDS));
    metrics.put(
        "proactiveSpillCountRecs",
        getCounterValue(counterNameToValue, MetricsCounter.SPILLED_RECORDS));
    metrics.put(
        "reduceInputRecords",
        getCounterValue(counterNameToValue, MetricsCounter.REDUCE_INPUT_RECORDS));
    metrics.put(
        "reduceOutputRecords",
        getCounterValue(counterNameToValue, MetricsCounter.REDUCE_OUTPUT_RECORDS));
    setMetrics(metrics);
  }
Beispiel #2
0
  /**
   * Cascading will hang on to the HadoopStepStats for every flow, until we get rid of them. So
   * we'll clear out stats once a FlowFuture is done.
   */
  private void clearStats(FlowFuture ff) {
    Flow flow = ff.getFlow();
    FlowStats fs = ff.getFlow().getFlowStats();

    List<FlowStep> flowSteps = flow.getFlowSteps();
    for (FlowStep flowStep : flowSteps) {
      FlowStepStats stepStats = flowStep.getFlowStepStats();

      if (stepStats instanceof HadoopStepStats) {
        HadoopStepStats hadoopSS = (HadoopStepStats) stepStats;
        hadoopSS.getTaskStats().clear();
      }
    }
  }
Beispiel #3
0
  /**
   * helper method for counter group map retrieval
   *
   * @param HadoopStepStats
   * @return a map of counter name to counter value
   */
  private Map<String, Long> counterGroupInfoMapHelper(HadoopStepStats stats) {
    Counters counters = new Counters();
    Map<String, Long> counterNameToValue = new HashMap<String, Long>();
    for (String groupName : stats.getCounterGroups()) { // retreiving groups
      for (String counterName :
          stats.getCountersFor(groupName)) { // retreiving counters in that group
        Long counterValue = stats.getCounterValue(groupName, counterName);
        counterNameToValue.put(groupName + "::" + counterName, counterValue);

        // creating counter
        Counter counter = counters.findCounter(groupName, counterName);
        counter.setValue(counterValue);
      }
    }
    setCounterGroupMap(CounterGroup.counterGroupInfoMap(counters));
    return counterNameToValue;
  }
Beispiel #4
0
  private void collectStats(FlowFuture ff, Map<String, TaskStats> taskCounts) {
    Flow flow = ff.getFlow();
    FlowStats fs = flow.getFlowStats();

    String flowId = flow.getID();
    String flowName = flow.getName();

    List<FlowStep> flowSteps = flow.getFlowSteps();
    for (FlowStep flowStep : flowSteps) {
      FlowStepStats stepStats = flowStep.getFlowStepStats();

      String stepId = flowStep.getID();
      String stepName = flowStep.getName();

      String countsKey = String.format("%s-%s", flowId, stepId);
      if (stepStats instanceof HadoopStepStats) {
        HadoopStepStats hadoopSS = (HadoopStepStats) stepStats;
        // We don't want/need info on task attempts
        hadoopSS.captureDetail(false);

        // We have one child for every task. We have to see if it's
        // running, and if so, whether it's a mapper or reducer
        Iterator<HadoopSliceStats> iter = hadoopSS.getChildren().iterator();
        while (iter.hasNext()) {
          HadoopSliceStats sliceStats = iter.next();
          // System.out.println(String.format("id=%s, kind=%s, status=%s", sliceStats.getID(),
          // sliceStats.getKind(), sliceStats.getStatus()));

          if (sliceStats.getStatus() == Status.SUCCESSFUL) {
            // Set the total time
            // TODO this doesn't seem to be working, I get 0.
            incrementCounts(
                taskCounts,
                countsKey,
                flowName,
                stepName,
                0,
                0,
                sliceStats.getCounterValue(JobInProgress.Counter.SLOTS_MILLIS_MAPS),
                sliceStats.getCounterValue(JobInProgress.Counter.SLOTS_MILLIS_REDUCES));
          } else if (sliceStats.getStatus() == Status.RUNNING) {
            if (sliceStats.getKind() == Kind.MAPPER) {
              incrementCounts(taskCounts, countsKey, flowName, stepName, 1, 0, 0, 0);
            } else if (sliceStats.getKind() == Kind.REDUCER) {
              incrementCounts(taskCounts, countsKey, flowName, stepName, 0, 1, 0, 0);
            }
          }
        }
      } else if (stepStats instanceof LocalStepStats) {
        stepStats.captureDetail();

        // map & reduce kind of run as one, so just add one to both if there's a group.
        incrementCounts(taskCounts, countsKey, flowName, stepName, 1, 0, 0, 0);
        if (flowStep.getGroups().size() > 0) {
          incrementCounts(taskCounts, countsKey, flowName, stepName, 0, 1, 0, 0);
        }
      } else {
        throw new RuntimeException(
            "Unknown type returned by FlowStep.getFlowStepStats: " + stepStats.getClass());
      }
    }
  }