@Override
  protected void instantiateCandidate(
      OperatorDescriptorSingle dps,
      Channel in,
      List<Set<? extends NamedChannel>> broadcastPlanChannels,
      List<PlanNode> target,
      CostEstimator estimator,
      RequestedGlobalProperties globPropsReq,
      RequestedLocalProperties locPropsReq) {
    // NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
    // Whenever we instantiate the iteration, we enumerate new candidates for the step function.
    // That way, we make sure we have an appropriate plan for each candidate for the initial partial
    // solution,
    // we have a fitting candidate for the step function (often, work is pushed out of the step
    // function).
    // Among the candidates of the step function, we keep only those that meet the requested
    // properties of the
    // current candidate initial partial solution. That makes sure these properties exist at the
    // beginning of
    // the successive iteration.

    // 1) Because we enumerate multiple times, we may need to clean the cached plans
    //    before starting another enumeration
    this.nextPartialSolution.accept(PlanCacheCleaner.INSTANCE);

    // 2) Give the partial solution the properties of the current candidate for the initial partial
    // solution
    this.partialSolution.setCandidateProperties(in.getGlobalProperties(), in.getLocalProperties());
    final BulkPartialSolutionPlanNode pspn =
        this.partialSolution.getCurrentPartialSolutionPlanNode();

    // 3) Get the alternative plans
    List<PlanNode> candidates = this.nextPartialSolution.getAlternativePlans(estimator);

    // 4) Throw away all that are not compatible with the properties currently requested to the
    //    initial partial solution
    for (Iterator<PlanNode> planDeleter = candidates.iterator(); planDeleter.hasNext(); ) {
      PlanNode candidate = planDeleter.next();
      if (!(globPropsReq.isMetBy(candidate.getGlobalProperties())
          && locPropsReq.isMetBy(candidate.getLocalProperties()))) {
        planDeleter.remove();
      }
    }

    // 5) Create a candidate for the Iteration Node for every remaining plan of the step function.
    if (terminationCriterion == null) {
      for (PlanNode candidate : candidates) {
        BulkIterationPlanNode node =
            new BulkIterationPlanNode(
                this,
                "BulkIteration (" + this.getPactContract().getName() + ")",
                in,
                pspn,
                candidate);
        GlobalProperties gProps = candidate.getGlobalProperties().clone();
        LocalProperties lProps = candidate.getLocalProperties().clone();
        node.initProperties(gProps, lProps);
        target.add(node);
      }
    } else if (candidates.size() > 0) {
      List<PlanNode> terminationCriterionCandidates =
          this.terminationCriterion.getAlternativePlans(estimator);

      for (PlanNode candidate : candidates) {
        for (PlanNode terminationCandidate : terminationCriterionCandidates) {
          if (this.singleRoot.areBranchCompatible(candidate, terminationCandidate)) {

            BulkIterationPlanNode node =
                new BulkIterationPlanNode(
                    this,
                    "BulkIteration (" + this.getPactContract().getName() + ")",
                    in,
                    pspn,
                    candidate,
                    terminationCandidate);
            GlobalProperties gProps = candidate.getGlobalProperties().clone();
            LocalProperties lProps = candidate.getLocalProperties().clone();
            node.initProperties(gProps, lProps);
            target.add(node);
          }
        }
      }
    }
  }
示例#2
0
  /**
   * This method computes the cost of an operator. The cost is composed of cost for input shipping,
   * locally processing an input, and running the operator.
   *
   * <p>It requires at least that all inputs are set and have a proper ship strategy set, which is
   * not equal to <tt>NONE</tt>.
   *
   * @param n The node to compute the costs for.
   */
  public void costOperator(PlanNode n) {
    // initialize costs objects with no costs
    final Costs totalCosts = new Costs();
    final long availableMemory = n.getGuaranteedAvailableMemory();

    // add the shipping strategy costs
    for (Iterator<Channel> channels = n.getInputs(); channels.hasNext(); ) {
      final Channel channel = channels.next();
      final Costs costs = new Costs();

      // Plans that apply the same strategies, but at different points
      // are equally expensive. For example, if a partitioning can be
      // pushed below a Map function there is often no difference in plan
      // costs between the pushed down version and the version that partitions
      // after the Mapper. However, in those cases, we want the expensive
      // strategy to appear later in the plan, as data reduction often occurs
      // by large factors, while blowup is rare and typically by smaller fractions.
      // We achieve this by adding a penalty to small penalty to the FORWARD strategy,
      // weighted by the current plan depth (steps to the earliest data source).
      // that way, later FORWARDS are more expensive than earlier forwards.
      // Note that this only applies to the heuristic costs.

      switch (channel.getShipStrategy()) {
        case NONE:
          throw new CompilerException(
              "Cannot determine costs: Shipping strategy has not been set for an input.");
        case FORWARD:
          //				costs.addHeuristicNetworkCost(channel.getMaxDepth());
          break;
        case PARTITION_LOCAL_HASH:
          break;
        case PARTITION_RANDOM:
          addRandomPartitioningCost(channel, costs);
          break;
        case PARTITION_HASH:
          addHashPartitioningCost(channel, costs);
          break;
        case PARTITION_RANGE:
          addRangePartitionCost(channel, costs);
          break;
        case BROADCAST:
          addBroadcastCost(channel, channel.getReplicationFactor(), costs);
          break;
        default:
          throw new CompilerException(
              "Unknown shipping strategy for input: " + channel.getShipStrategy());
      }

      switch (channel.getLocalStrategy()) {
        case NONE:
          break;
        case SORT:
        case COMBININGSORT:
          addLocalSortCost(channel, availableMemory, costs);
          break;
        default:
          throw new CompilerException(
              "Unsupported local strategy for input: " + channel.getLocalStrategy());
      }

      if (channel.getTempMode() != null && channel.getTempMode() != TempMode.NONE) {
        addArtificialDamCost(channel, 0, costs);
      }

      // adjust with the cost weight factor
      if (channel.isOnDynamicPath()) {
        costs.multiplyWith(channel.getCostWeight());
      }

      totalCosts.addCosts(costs);
    }

    Channel firstInput = null;
    Channel secondInput = null;
    Costs driverCosts = new Costs();

    // get the inputs, if we have some
    {
      Iterator<Channel> channels = n.getInputs();
      if (channels.hasNext()) firstInput = channels.next();
      if (channels.hasNext()) secondInput = channels.next();
    }

    // determine the local costs
    switch (n.getDriverStrategy()) {
      case NONE:
      case UNARY_NO_OP:
      case BINARY_NO_OP:
      case COLLECTOR_MAP:
      case MAP:
      case FLAT_MAP:

      case ALL_GROUP:
        // this operation does not do any actual grouping, since every element is in the same single
        // group

      case CO_GROUP:
      case SORTED_GROUP:
        // grouping or co-grouping over sorted streams for free

      case PARTIAL_GROUP:
        // partial grouping is always local and main memory resident. we should add a relative cpu
        // cost at some point

      case UNION:
        // pipelined local union is for free

        break;
      case MERGE:
        addLocalMergeCost(firstInput, secondInput, availableMemory, driverCosts);
        break;
      case HYBRIDHASH_BUILD_FIRST:
        addHybridHashCosts(firstInput, secondInput, availableMemory, driverCosts);
        break;
      case HYBRIDHASH_BUILD_SECOND:
        addHybridHashCosts(secondInput, firstInput, availableMemory, driverCosts);
        break;
      case NESTEDLOOP_BLOCKED_OUTER_FIRST:
        addBlockNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts);
        break;
      case NESTEDLOOP_BLOCKED_OUTER_SECOND:
        addBlockNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts);
        break;
      case NESTEDLOOP_STREAMED_OUTER_FIRST:
        addStreamedNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts);
        break;
      case NESTEDLOOP_STREAMED_OUTER_SECOND:
        addStreamedNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts);
        break;
      case GROUP_SELF_NESTEDLOOP:
      default:
        throw new CompilerException("Unknown local strategy: " + n.getDriverStrategy().name());
    }

    // adjust with the cost weight factor
    if (n.isOnDynamicPath()) {
      driverCosts.multiplyWith(n.getCostWeight());
    }

    totalCosts.addCosts(driverCosts);
    n.setCosts(totalCosts);
  }
示例#3
0
 public String toString() {
   return original.toString();
 }
  @Override
  protected void instantiate(
      OperatorDescriptorDual operator,
      Channel solutionSetIn,
      Channel worksetIn,
      List<Set<? extends NamedChannel>> broadcastPlanChannels,
      List<PlanNode> target,
      CostEstimator estimator,
      RequestedGlobalProperties globPropsReqSolutionSet,
      RequestedGlobalProperties globPropsReqWorkset,
      RequestedLocalProperties locPropsReqSolutionSet,
      RequestedLocalProperties locPropsReqWorkset) {
    // check for pipeline breaking using hash join with build on the solution set side
    placePipelineBreakersIfNecessary(
        DriverStrategy.HYBRIDHASH_BUILD_FIRST, solutionSetIn, worksetIn);

    // NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
    // Whenever we instantiate the iteration, we enumerate new candidates for the step function.
    // That way, we make sure we have an appropriate plan for each candidate for the initial partial
    // solution,
    // we have a fitting candidate for the step function (often, work is pushed out of the step
    // function).
    // Among the candidates of the step function, we keep only those that meet the requested
    // properties of the
    // current candidate initial partial solution. That makes sure these properties exist at the
    // beginning of
    // every iteration.

    // 1) Because we enumerate multiple times, we may need to clean the cached plans
    //    before starting another enumeration
    this.nextWorkset.accept(PlanCacheCleaner.INSTANCE);
    this.solutionSetDelta.accept(PlanCacheCleaner.INSTANCE);

    // 2) Give the partial solution the properties of the current candidate for the initial partial
    // solution
    //    This concerns currently only the workset.
    this.worksetNode.setCandidateProperties(
        worksetIn.getGlobalProperties(), worksetIn.getLocalProperties());
    this.solutionSetNode.setCandidateProperties(this.partitionedProperties, new LocalProperties());

    final SolutionSetPlanNode sspn = this.solutionSetNode.getCurrentSolutionSetPlanNode();
    final WorksetPlanNode wspn = this.worksetNode.getCurrentWorksetPlanNode();

    // 3) Get the alternative plans
    List<PlanNode> solutionSetDeltaCandidates =
        this.solutionSetDelta.getAlternativePlans(estimator);
    List<PlanNode> worksetCandidates = this.nextWorkset.getAlternativePlans(estimator);

    // 4) Throw away all that are not compatible with the properties currently requested to the
    //    initial partial solution

    // Make sure that the workset candidates fulfill the input requirements
    for (Iterator<PlanNode> planDeleter = worksetCandidates.iterator(); planDeleter.hasNext(); ) {
      PlanNode candidate = planDeleter.next();
      if (!(globPropsReqWorkset.isMetBy(candidate.getGlobalProperties())
          && locPropsReqWorkset.isMetBy(candidate.getLocalProperties()))) {
        planDeleter.remove();
      }
    }
    if (worksetCandidates.isEmpty()) {
      return;
    }

    // sanity check the solution set delta and cancel out the delta node, if it is not needed
    for (Iterator<PlanNode> deltaPlans = solutionSetDeltaCandidates.iterator();
        deltaPlans.hasNext(); ) {
      SingleInputPlanNode candidate = (SingleInputPlanNode) deltaPlans.next();
      GlobalProperties gp = candidate.getGlobalProperties();

      if (gp.getPartitioning() != PartitioningProperty.HASH_PARTITIONED
          || gp.getPartitioningFields() == null
          || !gp.getPartitioningFields().equals(this.solutionSetKeyFields)) {
        throw new CompilerException("Bug: The solution set delta is not partitioned.");
      }
    }

    // 5) Create a candidate for the Iteration Node for every remaining plan of the step function.

    final GlobalProperties gp = new GlobalProperties();
    gp.setHashPartitioned(this.solutionSetKeyFields);
    gp.addUniqueFieldCombination(this.solutionSetKeyFields);

    final LocalProperties lp = new LocalProperties();
    lp.addUniqueFields(this.solutionSetKeyFields);

    // take all combinations of solution set delta and workset plans
    for (PlanNode solutionSetCandidate : solutionSetDeltaCandidates) {
      for (PlanNode worksetCandidate : worksetCandidates) {
        // check whether they have the same operator at their latest branching point
        if (this.singleRoot.areBranchCompatible(solutionSetCandidate, worksetCandidate)) {

          SingleInputPlanNode siSolutionDeltaCandidate = (SingleInputPlanNode) solutionSetCandidate;
          boolean immediateDeltaUpdate;

          // check whether we need a dedicated solution set delta operator, or whether we can update
          // on the fly
          if (siSolutionDeltaCandidate.getInput().getShipStrategy() == ShipStrategyType.FORWARD
              && this.solutionDeltaImmediatelyAfterSolutionJoin) {
            // we do not need this extra node. we can make the predecessor the delta
            // sanity check the node and connection
            if (siSolutionDeltaCandidate.getDriverStrategy() != DriverStrategy.UNARY_NO_OP
                || siSolutionDeltaCandidate.getInput().getLocalStrategy() != LocalStrategy.NONE) {
              throw new CompilerException("Invalid Solution set delta node.");
            }

            solutionSetCandidate = siSolutionDeltaCandidate.getInput().getSource();
            immediateDeltaUpdate = true;
          } else {
            // was not partitioned, we need to keep this node.
            // mark that we materialize the input
            siSolutionDeltaCandidate.getInput().setTempMode(TempMode.PIPELINE_BREAKER);
            immediateDeltaUpdate = false;
          }

          WorksetIterationPlanNode wsNode =
              new WorksetIterationPlanNode(
                  this,
                  "WorksetIteration (" + this.getPactContract().getName() + ")",
                  solutionSetIn,
                  worksetIn,
                  sspn,
                  wspn,
                  worksetCandidate,
                  solutionSetCandidate);
          wsNode.setImmediateSolutionSetUpdate(immediateDeltaUpdate);
          wsNode.initProperties(gp, lp);
          target.add(wsNode);
        }
      }
    }
  }