Beispiel #1
0
  /**
   * Wait for an open slot, and then start the Flow running, returning the corresponding FlowFuture.
   *
   * <p>FUTURE - we could make FlowFuture something where the flow doesn't start until you call
   * FlowFuture.start(). Then this routine could immediately return the FlowFuture, and start it
   * when we drop under <maxFlows>. But that would require an async executor to constantly be
   * polling the running flows, determining when they are done.
   *
   * @param flow the Flow to run
   * @return FlowFuture
   * @throws InterruptedException
   */
  public FlowFuture addFlow(Flow flow) throws InterruptedException {

    // Find an open spot, or loop until we get one.
    while (true) {
      synchronized (_flowFutures) {
        Iterator<FlowFuture> iter = _flowFutures.iterator();
        while (iter.hasNext()) {
          FlowFuture ff = iter.next();
          if (ff.isDone()) {
            clearStats(ff);
            iter.remove();
          }
        }

        // Now that we've removed any flows that are done, see if we
        // can add the new flow.
        if (_flowFutures.size() < _maxFlows) {
          FlowFuture ff = new FlowFuture(flow);
          _flowFutures.add(ff);
          return ff;
        }
      }

      // No open slots, so loop
      Thread.sleep(FLOW_CHECK_INTERVAL);
    }
  }
Beispiel #2
0
  /**
   * Cascading will hang on to the HadoopStepStats for every flow, until we get rid of them. So
   * we'll clear out stats once a FlowFuture is done.
   */
  private void clearStats(FlowFuture ff) {
    Flow flow = ff.getFlow();
    FlowStats fs = ff.getFlow().getFlowStats();

    List<FlowStep> flowSteps = flow.getFlowSteps();
    for (FlowStep flowStep : flowSteps) {
      FlowStepStats stepStats = flowStep.getFlowStepStats();

      if (stepStats instanceof HadoopStepStats) {
        HadoopStepStats hadoopSS = (HadoopStepStats) stepStats;
        hadoopSS.getTaskStats().clear();
      }
    }
  }
Beispiel #3
0
  /**
   * Return true if all of the flows are done running.
   *
   * @return
   */
  public boolean isDone() {
    synchronized (_flowFutures) {
      Iterator<FlowFuture> iter = _flowFutures.iterator();
      while (iter.hasNext()) {
        FlowFuture ff = iter.next();
        if (ff.isDone()) {
          iter.remove();
        } else {
          return false;
        }
      }
    }

    // Nothing still running, so we're all done.
    return true;
  }
Beispiel #4
0
  public void terminate() {
    if (_statsThread != null) {
      synchronized (_statsThread) {
        // Somebody might have cleared the thread
        if ((_statsThread != null) && _statsThread.isAlive()) {
          _statsThread.interrupt();
          _statsThread = null;
        }
      }
    }

    // Now terminate all of the running flows.
    synchronized (_flowFutures) {
      Iterator<FlowFuture> iter = _flowFutures.iterator();
      while (iter.hasNext()) {
        FlowFuture ff = iter.next();
        if (ff.isDone()) {
          iter.remove();
        } else {
          ff.cancel(true);
        }
      }
    }
  }
Beispiel #5
0
 /**
  * Convenience method for running a Flow and returning the result.
  *
  * @param flow Flow to run
  * @return Result of running the flow.
  * @throws InterruptedException
  * @throws ExecutionException
  */
 public static FlowResult run(Flow flow) throws InterruptedException, ExecutionException {
   FlowFuture ff = new FlowFuture(flow);
   return ff.get();
 }
Beispiel #6
0
  private void collectStats(FlowFuture ff, Map<String, TaskStats> taskCounts) {
    Flow flow = ff.getFlow();
    FlowStats fs = flow.getFlowStats();

    String flowId = flow.getID();
    String flowName = flow.getName();

    List<FlowStep> flowSteps = flow.getFlowSteps();
    for (FlowStep flowStep : flowSteps) {
      FlowStepStats stepStats = flowStep.getFlowStepStats();

      String stepId = flowStep.getID();
      String stepName = flowStep.getName();

      String countsKey = String.format("%s-%s", flowId, stepId);
      if (stepStats instanceof HadoopStepStats) {
        HadoopStepStats hadoopSS = (HadoopStepStats) stepStats;
        // We don't want/need info on task attempts
        hadoopSS.captureDetail(false);

        // We have one child for every task. We have to see if it's
        // running, and if so, whether it's a mapper or reducer
        Iterator<HadoopSliceStats> iter = hadoopSS.getChildren().iterator();
        while (iter.hasNext()) {
          HadoopSliceStats sliceStats = iter.next();
          // System.out.println(String.format("id=%s, kind=%s, status=%s", sliceStats.getID(),
          // sliceStats.getKind(), sliceStats.getStatus()));

          if (sliceStats.getStatus() == Status.SUCCESSFUL) {
            // Set the total time
            // TODO this doesn't seem to be working, I get 0.
            incrementCounts(
                taskCounts,
                countsKey,
                flowName,
                stepName,
                0,
                0,
                sliceStats.getCounterValue(JobInProgress.Counter.SLOTS_MILLIS_MAPS),
                sliceStats.getCounterValue(JobInProgress.Counter.SLOTS_MILLIS_REDUCES));
          } else if (sliceStats.getStatus() == Status.RUNNING) {
            if (sliceStats.getKind() == Kind.MAPPER) {
              incrementCounts(taskCounts, countsKey, flowName, stepName, 1, 0, 0, 0);
            } else if (sliceStats.getKind() == Kind.REDUCER) {
              incrementCounts(taskCounts, countsKey, flowName, stepName, 0, 1, 0, 0);
            }
          }
        }
      } else if (stepStats instanceof LocalStepStats) {
        stepStats.captureDetail();

        // map & reduce kind of run as one, so just add one to both if there's a group.
        incrementCounts(taskCounts, countsKey, flowName, stepName, 1, 0, 0, 0);
        if (flowStep.getGroups().size() > 0) {
          incrementCounts(taskCounts, countsKey, flowName, stepName, 0, 1, 0, 0);
        }
      } else {
        throw new RuntimeException(
            "Unknown type returned by FlowStep.getFlowStepStats: " + stepStats.getClass());
      }
    }
  }