Example #1
0
  /**
   * This method computes the cost of an operator. The cost is composed of cost for input shipping,
   * locally processing an input, and running the operator.
   *
   * <p>It requires at least that all inputs are set and have a proper ship strategy set, which is
   * not equal to <tt>NONE</tt>.
   *
   * @param n The node to compute the costs for.
   */
  public void costOperator(PlanNode n) {
    // initialize costs objects with no costs
    final Costs totalCosts = new Costs();
    final long availableMemory = n.getGuaranteedAvailableMemory();

    // add the shipping strategy costs
    for (Iterator<Channel> channels = n.getInputs(); channels.hasNext(); ) {
      final Channel channel = channels.next();
      final Costs costs = new Costs();

      // Plans that apply the same strategies, but at different points
      // are equally expensive. For example, if a partitioning can be
      // pushed below a Map function there is often no difference in plan
      // costs between the pushed down version and the version that partitions
      // after the Mapper. However, in those cases, we want the expensive
      // strategy to appear later in the plan, as data reduction often occurs
      // by large factors, while blowup is rare and typically by smaller fractions.
      // We achieve this by adding a penalty to small penalty to the FORWARD strategy,
      // weighted by the current plan depth (steps to the earliest data source).
      // that way, later FORWARDS are more expensive than earlier forwards.
      // Note that this only applies to the heuristic costs.

      switch (channel.getShipStrategy()) {
        case NONE:
          throw new CompilerException(
              "Cannot determine costs: Shipping strategy has not been set for an input.");
        case FORWARD:
          //				costs.addHeuristicNetworkCost(channel.getMaxDepth());
          break;
        case PARTITION_LOCAL_HASH:
          break;
        case PARTITION_RANDOM:
          addRandomPartitioningCost(channel, costs);
          break;
        case PARTITION_HASH:
          addHashPartitioningCost(channel, costs);
          break;
        case PARTITION_RANGE:
          addRangePartitionCost(channel, costs);
          break;
        case BROADCAST:
          addBroadcastCost(channel, channel.getReplicationFactor(), costs);
          break;
        default:
          throw new CompilerException(
              "Unknown shipping strategy for input: " + channel.getShipStrategy());
      }

      switch (channel.getLocalStrategy()) {
        case NONE:
          break;
        case SORT:
        case COMBININGSORT:
          addLocalSortCost(channel, availableMemory, costs);
          break;
        default:
          throw new CompilerException(
              "Unsupported local strategy for input: " + channel.getLocalStrategy());
      }

      if (channel.getTempMode() != null && channel.getTempMode() != TempMode.NONE) {
        addArtificialDamCost(channel, 0, costs);
      }

      // adjust with the cost weight factor
      if (channel.isOnDynamicPath()) {
        costs.multiplyWith(channel.getCostWeight());
      }

      totalCosts.addCosts(costs);
    }

    Channel firstInput = null;
    Channel secondInput = null;
    Costs driverCosts = new Costs();

    // get the inputs, if we have some
    {
      Iterator<Channel> channels = n.getInputs();
      if (channels.hasNext()) firstInput = channels.next();
      if (channels.hasNext()) secondInput = channels.next();
    }

    // determine the local costs
    switch (n.getDriverStrategy()) {
      case NONE:
      case UNARY_NO_OP:
      case BINARY_NO_OP:
      case COLLECTOR_MAP:
      case MAP:
      case FLAT_MAP:

      case ALL_GROUP:
        // this operation does not do any actual grouping, since every element is in the same single
        // group

      case CO_GROUP:
      case SORTED_GROUP:
        // grouping or co-grouping over sorted streams for free

      case PARTIAL_GROUP:
        // partial grouping is always local and main memory resident. we should add a relative cpu
        // cost at some point

      case UNION:
        // pipelined local union is for free

        break;
      case MERGE:
        addLocalMergeCost(firstInput, secondInput, availableMemory, driverCosts);
        break;
      case HYBRIDHASH_BUILD_FIRST:
        addHybridHashCosts(firstInput, secondInput, availableMemory, driverCosts);
        break;
      case HYBRIDHASH_BUILD_SECOND:
        addHybridHashCosts(secondInput, firstInput, availableMemory, driverCosts);
        break;
      case NESTEDLOOP_BLOCKED_OUTER_FIRST:
        addBlockNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts);
        break;
      case NESTEDLOOP_BLOCKED_OUTER_SECOND:
        addBlockNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts);
        break;
      case NESTEDLOOP_STREAMED_OUTER_FIRST:
        addStreamedNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts);
        break;
      case NESTEDLOOP_STREAMED_OUTER_SECOND:
        addStreamedNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts);
        break;
      case GROUP_SELF_NESTEDLOOP:
      default:
        throw new CompilerException("Unknown local strategy: " + n.getDriverStrategy().name());
    }

    // adjust with the cost weight factor
    if (n.isOnDynamicPath()) {
      driverCosts.multiplyWith(n.getCostWeight());
    }

    totalCosts.addCosts(driverCosts);
    n.setCosts(totalCosts);
  }