Пример #1
0
  public void connectToPredecessors(
      Map<IntermediateDataSetID, IntermediateResult> intermediateDataSets) throws JobException {

    List<JobEdge> inputs = jobVertex.getInputs();

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          String.format(
              "Connecting ExecutionJobVertex %s (%s) to %d predecessors.",
              jobVertex.getID(), jobVertex.getName(), inputs.size()));
    }

    for (int num = 0; num < inputs.size(); num++) {
      JobEdge edge = inputs.get(num);

      if (LOG.isDebugEnabled()) {
        if (edge.getSource() == null) {
          LOG.debug(
              String.format(
                  "Connecting input %d of vertex %s (%s) to intermediate result referenced via ID %s.",
                  num, jobVertex.getID(), jobVertex.getName(), edge.getSourceId()));
        } else {
          LOG.debug(
              String.format(
                  "Connecting input %d of vertex %s (%s) to intermediate result referenced via predecessor %s (%s).",
                  num,
                  jobVertex.getID(),
                  jobVertex.getName(),
                  edge.getSource().getProducer().getID(),
                  edge.getSource().getProducer().getName()));
        }
      }

      // fetch the intermediate result via ID. if it does not exist, then it either has not been
      // created, or the order
      // in which this method is called for the job vertices is not a topological order
      IntermediateResult ires = intermediateDataSets.get(edge.getSourceId());
      if (ires == null) {
        throw new JobException(
            "Cannot connect this job graph to the previous graph. No previous intermediate result found for ID "
                + edge.getSourceId());
      }

      this.inputs.add(ires);

      int consumerIndex = ires.registerConsumer();

      for (int i = 0; i < parallelism; i++) {
        ExecutionVertex ev = taskVertices[i];
        ev.connectSource(num, ires, edge, consumerIndex);
      }
    }
  }
Пример #2
0
  public ExecutionJobVertex(
      ExecutionGraph graph,
      JobVertex jobVertex,
      int defaultParallelism,
      Time timeout,
      long createTimestamp)
      throws JobException, IOException {

    if (graph == null || jobVertex == null) {
      throw new NullPointerException();
    }

    this.graph = graph;
    this.jobVertex = jobVertex;

    int vertexParallelism = jobVertex.getParallelism();
    int numTaskVertices = vertexParallelism > 0 ? vertexParallelism : defaultParallelism;

    this.parallelism = numTaskVertices;

    int maxP = jobVertex.getMaxParallelism();

    Preconditions.checkArgument(
        maxP >= parallelism,
        "The maximum parallelism ("
            + maxP
            + ") must be greater or equal than the parallelism ("
            + parallelism
            + ").");
    this.maxParallelism = maxP;

    this.serializedTaskInformation =
        new SerializedValue<>(
            new TaskInformation(
                jobVertex.getID(),
                jobVertex.getName(),
                parallelism,
                maxParallelism,
                jobVertex.getInvokableClassName(),
                jobVertex.getConfiguration()));

    this.taskVertices = new ExecutionVertex[numTaskVertices];

    this.inputs = new ArrayList<IntermediateResult>(jobVertex.getInputs().size());

    // take the sharing group
    this.slotSharingGroup = jobVertex.getSlotSharingGroup();
    this.coLocationGroup = jobVertex.getCoLocationGroup();

    // setup the coLocation group
    if (coLocationGroup != null && slotSharingGroup == null) {
      throw new JobException("Vertex uses a co-location constraint without using slot sharing");
    }

    // create the intermediate results
    this.producedDataSets =
        new IntermediateResult[jobVertex.getNumberOfProducedIntermediateDataSets()];

    for (int i = 0; i < jobVertex.getProducedDataSets().size(); i++) {
      final IntermediateDataSet result = jobVertex.getProducedDataSets().get(i);

      this.producedDataSets[i] =
          new IntermediateResult(result.getId(), this, numTaskVertices, result.getResultType());
    }

    Configuration jobConfiguration = graph.getJobConfiguration();
    int maxPriorAttemptsHistoryLength =
        jobConfiguration != null
            ? jobConfiguration.getInteger(JobManagerOptions.MAX_ATTEMPTS_HISTORY_SIZE)
            : JobManagerOptions.MAX_ATTEMPTS_HISTORY_SIZE.defaultValue();

    // create all task vertices
    for (int i = 0; i < numTaskVertices; i++) {
      ExecutionVertex vertex =
          new ExecutionVertex(
              this,
              i,
              this.producedDataSets,
              timeout,
              createTimestamp,
              maxPriorAttemptsHistoryLength);

      this.taskVertices[i] = vertex;
    }

    // sanity check for the double referencing between intermediate result partitions and execution
    // vertices
    for (IntermediateResult ir : this.producedDataSets) {
      if (ir.getNumberOfAssignedPartitions() != parallelism) {
        throw new RuntimeException(
            "The intermediate result's partitions were not correctly assigned.");
      }
    }

    // set up the input splits, if the vertex has any
    try {
      @SuppressWarnings("unchecked")
      InputSplitSource<InputSplit> splitSource =
          (InputSplitSource<InputSplit>) jobVertex.getInputSplitSource();

      if (splitSource != null) {
        Thread currentThread = Thread.currentThread();
        ClassLoader oldContextClassLoader = currentThread.getContextClassLoader();
        currentThread.setContextClassLoader(graph.getUserClassLoader());
        try {
          inputSplits = splitSource.createInputSplits(numTaskVertices);

          if (inputSplits != null) {
            splitAssigner = splitSource.getInputSplitAssigner(inputSplits);
          }
        } finally {
          currentThread.setContextClassLoader(oldContextClassLoader);
        }
      } else {
        inputSplits = null;
      }
    } catch (Throwable t) {
      throw new JobException("Creating the input splits caused an error: " + t.getMessage(), t);
    }

    finishedSubtasks = new boolean[parallelism];
  }