/** * This method computes the cost of an operator. The cost is composed of cost for input shipping, * locally processing an input, and running the operator. * * <p>It requires at least that all inputs are set and have a proper ship strategy set, which is * not equal to <tt>NONE</tt>. * * @param n The node to compute the costs for. */ public void costOperator(PlanNode n) { // initialize costs objects with no costs final Costs totalCosts = new Costs(); final long availableMemory = n.getGuaranteedAvailableMemory(); // add the shipping strategy costs for (Iterator<Channel> channels = n.getInputs(); channels.hasNext(); ) { final Channel channel = channels.next(); final Costs costs = new Costs(); // Plans that apply the same strategies, but at different points // are equally expensive. For example, if a partitioning can be // pushed below a Map function there is often no difference in plan // costs between the pushed down version and the version that partitions // after the Mapper. However, in those cases, we want the expensive // strategy to appear later in the plan, as data reduction often occurs // by large factors, while blowup is rare and typically by smaller fractions. // We achieve this by adding a penalty to small penalty to the FORWARD strategy, // weighted by the current plan depth (steps to the earliest data source). // that way, later FORWARDS are more expensive than earlier forwards. // Note that this only applies to the heuristic costs. switch (channel.getShipStrategy()) { case NONE: throw new CompilerException( "Cannot determine costs: Shipping strategy has not been set for an input."); case FORWARD: // costs.addHeuristicNetworkCost(channel.getMaxDepth()); break; case PARTITION_LOCAL_HASH: break; case PARTITION_RANDOM: addRandomPartitioningCost(channel, costs); break; case PARTITION_HASH: addHashPartitioningCost(channel, costs); break; case PARTITION_RANGE: addRangePartitionCost(channel, costs); break; case BROADCAST: addBroadcastCost(channel, channel.getReplicationFactor(), costs); break; default: throw new CompilerException( "Unknown shipping strategy for input: " + channel.getShipStrategy()); } switch (channel.getLocalStrategy()) { case NONE: break; case SORT: case COMBININGSORT: addLocalSortCost(channel, availableMemory, costs); break; default: throw new CompilerException( "Unsupported local strategy for input: " + channel.getLocalStrategy()); } if (channel.getTempMode() != null && channel.getTempMode() != TempMode.NONE) { addArtificialDamCost(channel, 0, costs); } // adjust with the cost weight factor if (channel.isOnDynamicPath()) { costs.multiplyWith(channel.getCostWeight()); } totalCosts.addCosts(costs); } Channel firstInput = null; Channel secondInput = null; Costs driverCosts = new Costs(); // get the inputs, if we have some { Iterator<Channel> channels = n.getInputs(); if (channels.hasNext()) firstInput = channels.next(); if (channels.hasNext()) secondInput = channels.next(); } // determine the local costs switch (n.getDriverStrategy()) { case NONE: case UNARY_NO_OP: case BINARY_NO_OP: case COLLECTOR_MAP: case MAP: case FLAT_MAP: case ALL_GROUP: // this operation does not do any actual grouping, since every element is in the same single // group case CO_GROUP: case SORTED_GROUP: // grouping or co-grouping over sorted streams for free case PARTIAL_GROUP: // partial grouping is always local and main memory resident. we should add a relative cpu // cost at some point case UNION: // pipelined local union is for free break; case MERGE: addLocalMergeCost(firstInput, secondInput, availableMemory, driverCosts); break; case HYBRIDHASH_BUILD_FIRST: addHybridHashCosts(firstInput, secondInput, availableMemory, driverCosts); break; case HYBRIDHASH_BUILD_SECOND: addHybridHashCosts(secondInput, firstInput, availableMemory, driverCosts); break; case NESTEDLOOP_BLOCKED_OUTER_FIRST: addBlockNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts); break; case NESTEDLOOP_BLOCKED_OUTER_SECOND: addBlockNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts); break; case NESTEDLOOP_STREAMED_OUTER_FIRST: addStreamedNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts); break; case NESTEDLOOP_STREAMED_OUTER_SECOND: addStreamedNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts); break; case GROUP_SELF_NESTEDLOOP: default: throw new CompilerException("Unknown local strategy: " + n.getDriverStrategy().name()); } // adjust with the cost weight factor if (n.isOnDynamicPath()) { driverCosts.multiplyWith(n.getCostWeight()); } totalCosts.addCosts(driverCosts); n.setCosts(totalCosts); }