@Override protected void instantiateCandidate( OperatorDescriptorSingle dps, Channel in, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReq, RequestedLocalProperties locPropsReq) { // NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS: // Whenever we instantiate the iteration, we enumerate new candidates for the step function. // That way, we make sure we have an appropriate plan for each candidate for the initial partial // solution, // we have a fitting candidate for the step function (often, work is pushed out of the step // function). // Among the candidates of the step function, we keep only those that meet the requested // properties of the // current candidate initial partial solution. That makes sure these properties exist at the // beginning of // the successive iteration. // 1) Because we enumerate multiple times, we may need to clean the cached plans // before starting another enumeration this.nextPartialSolution.accept(PlanCacheCleaner.INSTANCE); // 2) Give the partial solution the properties of the current candidate for the initial partial // solution this.partialSolution.setCandidateProperties(in.getGlobalProperties(), in.getLocalProperties()); final BulkPartialSolutionPlanNode pspn = this.partialSolution.getCurrentPartialSolutionPlanNode(); // 3) Get the alternative plans List<PlanNode> candidates = this.nextPartialSolution.getAlternativePlans(estimator); // 4) Throw away all that are not compatible with the properties currently requested to the // initial partial solution for (Iterator<PlanNode> planDeleter = candidates.iterator(); planDeleter.hasNext(); ) { PlanNode candidate = planDeleter.next(); if (!(globPropsReq.isMetBy(candidate.getGlobalProperties()) && locPropsReq.isMetBy(candidate.getLocalProperties()))) { planDeleter.remove(); } } // 5) Create a candidate for the Iteration Node for every remaining plan of the step function. if (terminationCriterion == null) { for (PlanNode candidate : candidates) { BulkIterationPlanNode node = new BulkIterationPlanNode( this, "BulkIteration (" + this.getPactContract().getName() + ")", in, pspn, candidate); GlobalProperties gProps = candidate.getGlobalProperties().clone(); LocalProperties lProps = candidate.getLocalProperties().clone(); node.initProperties(gProps, lProps); target.add(node); } } else if (candidates.size() > 0) { List<PlanNode> terminationCriterionCandidates = this.terminationCriterion.getAlternativePlans(estimator); for (PlanNode candidate : candidates) { for (PlanNode terminationCandidate : terminationCriterionCandidates) { if (this.singleRoot.areBranchCompatible(candidate, terminationCandidate)) { BulkIterationPlanNode node = new BulkIterationPlanNode( this, "BulkIteration (" + this.getPactContract().getName() + ")", in, pspn, candidate, terminationCandidate); GlobalProperties gProps = candidate.getGlobalProperties().clone(); LocalProperties lProps = candidate.getLocalProperties().clone(); node.initProperties(gProps, lProps); target.add(node); } } } } }
/** * This method computes the cost of an operator. The cost is composed of cost for input shipping, * locally processing an input, and running the operator. * * <p>It requires at least that all inputs are set and have a proper ship strategy set, which is * not equal to <tt>NONE</tt>. * * @param n The node to compute the costs for. */ public void costOperator(PlanNode n) { // initialize costs objects with no costs final Costs totalCosts = new Costs(); final long availableMemory = n.getGuaranteedAvailableMemory(); // add the shipping strategy costs for (Iterator<Channel> channels = n.getInputs(); channels.hasNext(); ) { final Channel channel = channels.next(); final Costs costs = new Costs(); // Plans that apply the same strategies, but at different points // are equally expensive. For example, if a partitioning can be // pushed below a Map function there is often no difference in plan // costs between the pushed down version and the version that partitions // after the Mapper. However, in those cases, we want the expensive // strategy to appear later in the plan, as data reduction often occurs // by large factors, while blowup is rare and typically by smaller fractions. // We achieve this by adding a penalty to small penalty to the FORWARD strategy, // weighted by the current plan depth (steps to the earliest data source). // that way, later FORWARDS are more expensive than earlier forwards. // Note that this only applies to the heuristic costs. switch (channel.getShipStrategy()) { case NONE: throw new CompilerException( "Cannot determine costs: Shipping strategy has not been set for an input."); case FORWARD: // costs.addHeuristicNetworkCost(channel.getMaxDepth()); break; case PARTITION_LOCAL_HASH: break; case PARTITION_RANDOM: addRandomPartitioningCost(channel, costs); break; case PARTITION_HASH: addHashPartitioningCost(channel, costs); break; case PARTITION_RANGE: addRangePartitionCost(channel, costs); break; case BROADCAST: addBroadcastCost(channel, channel.getReplicationFactor(), costs); break; default: throw new CompilerException( "Unknown shipping strategy for input: " + channel.getShipStrategy()); } switch (channel.getLocalStrategy()) { case NONE: break; case SORT: case COMBININGSORT: addLocalSortCost(channel, availableMemory, costs); break; default: throw new CompilerException( "Unsupported local strategy for input: " + channel.getLocalStrategy()); } if (channel.getTempMode() != null && channel.getTempMode() != TempMode.NONE) { addArtificialDamCost(channel, 0, costs); } // adjust with the cost weight factor if (channel.isOnDynamicPath()) { costs.multiplyWith(channel.getCostWeight()); } totalCosts.addCosts(costs); } Channel firstInput = null; Channel secondInput = null; Costs driverCosts = new Costs(); // get the inputs, if we have some { Iterator<Channel> channels = n.getInputs(); if (channels.hasNext()) firstInput = channels.next(); if (channels.hasNext()) secondInput = channels.next(); } // determine the local costs switch (n.getDriverStrategy()) { case NONE: case UNARY_NO_OP: case BINARY_NO_OP: case COLLECTOR_MAP: case MAP: case FLAT_MAP: case ALL_GROUP: // this operation does not do any actual grouping, since every element is in the same single // group case CO_GROUP: case SORTED_GROUP: // grouping or co-grouping over sorted streams for free case PARTIAL_GROUP: // partial grouping is always local and main memory resident. we should add a relative cpu // cost at some point case UNION: // pipelined local union is for free break; case MERGE: addLocalMergeCost(firstInput, secondInput, availableMemory, driverCosts); break; case HYBRIDHASH_BUILD_FIRST: addHybridHashCosts(firstInput, secondInput, availableMemory, driverCosts); break; case HYBRIDHASH_BUILD_SECOND: addHybridHashCosts(secondInput, firstInput, availableMemory, driverCosts); break; case NESTEDLOOP_BLOCKED_OUTER_FIRST: addBlockNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts); break; case NESTEDLOOP_BLOCKED_OUTER_SECOND: addBlockNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts); break; case NESTEDLOOP_STREAMED_OUTER_FIRST: addStreamedNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts); break; case NESTEDLOOP_STREAMED_OUTER_SECOND: addStreamedNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts); break; case GROUP_SELF_NESTEDLOOP: default: throw new CompilerException("Unknown local strategy: " + n.getDriverStrategy().name()); } // adjust with the cost weight factor if (n.isOnDynamicPath()) { driverCosts.multiplyWith(n.getCostWeight()); } totalCosts.addCosts(driverCosts); n.setCosts(totalCosts); }
public String toString() { return original.toString(); }
@Override protected void instantiate( OperatorDescriptorDual operator, Channel solutionSetIn, Channel worksetIn, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReqSolutionSet, RequestedGlobalProperties globPropsReqWorkset, RequestedLocalProperties locPropsReqSolutionSet, RequestedLocalProperties locPropsReqWorkset) { // check for pipeline breaking using hash join with build on the solution set side placePipelineBreakersIfNecessary( DriverStrategy.HYBRIDHASH_BUILD_FIRST, solutionSetIn, worksetIn); // NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS: // Whenever we instantiate the iteration, we enumerate new candidates for the step function. // That way, we make sure we have an appropriate plan for each candidate for the initial partial // solution, // we have a fitting candidate for the step function (often, work is pushed out of the step // function). // Among the candidates of the step function, we keep only those that meet the requested // properties of the // current candidate initial partial solution. That makes sure these properties exist at the // beginning of // every iteration. // 1) Because we enumerate multiple times, we may need to clean the cached plans // before starting another enumeration this.nextWorkset.accept(PlanCacheCleaner.INSTANCE); this.solutionSetDelta.accept(PlanCacheCleaner.INSTANCE); // 2) Give the partial solution the properties of the current candidate for the initial partial // solution // This concerns currently only the workset. this.worksetNode.setCandidateProperties( worksetIn.getGlobalProperties(), worksetIn.getLocalProperties()); this.solutionSetNode.setCandidateProperties(this.partitionedProperties, new LocalProperties()); final SolutionSetPlanNode sspn = this.solutionSetNode.getCurrentSolutionSetPlanNode(); final WorksetPlanNode wspn = this.worksetNode.getCurrentWorksetPlanNode(); // 3) Get the alternative plans List<PlanNode> solutionSetDeltaCandidates = this.solutionSetDelta.getAlternativePlans(estimator); List<PlanNode> worksetCandidates = this.nextWorkset.getAlternativePlans(estimator); // 4) Throw away all that are not compatible with the properties currently requested to the // initial partial solution // Make sure that the workset candidates fulfill the input requirements for (Iterator<PlanNode> planDeleter = worksetCandidates.iterator(); planDeleter.hasNext(); ) { PlanNode candidate = planDeleter.next(); if (!(globPropsReqWorkset.isMetBy(candidate.getGlobalProperties()) && locPropsReqWorkset.isMetBy(candidate.getLocalProperties()))) { planDeleter.remove(); } } if (worksetCandidates.isEmpty()) { return; } // sanity check the solution set delta and cancel out the delta node, if it is not needed for (Iterator<PlanNode> deltaPlans = solutionSetDeltaCandidates.iterator(); deltaPlans.hasNext(); ) { SingleInputPlanNode candidate = (SingleInputPlanNode) deltaPlans.next(); GlobalProperties gp = candidate.getGlobalProperties(); if (gp.getPartitioning() != PartitioningProperty.HASH_PARTITIONED || gp.getPartitioningFields() == null || !gp.getPartitioningFields().equals(this.solutionSetKeyFields)) { throw new CompilerException("Bug: The solution set delta is not partitioned."); } } // 5) Create a candidate for the Iteration Node for every remaining plan of the step function. final GlobalProperties gp = new GlobalProperties(); gp.setHashPartitioned(this.solutionSetKeyFields); gp.addUniqueFieldCombination(this.solutionSetKeyFields); final LocalProperties lp = new LocalProperties(); lp.addUniqueFields(this.solutionSetKeyFields); // take all combinations of solution set delta and workset plans for (PlanNode solutionSetCandidate : solutionSetDeltaCandidates) { for (PlanNode worksetCandidate : worksetCandidates) { // check whether they have the same operator at their latest branching point if (this.singleRoot.areBranchCompatible(solutionSetCandidate, worksetCandidate)) { SingleInputPlanNode siSolutionDeltaCandidate = (SingleInputPlanNode) solutionSetCandidate; boolean immediateDeltaUpdate; // check whether we need a dedicated solution set delta operator, or whether we can update // on the fly if (siSolutionDeltaCandidate.getInput().getShipStrategy() == ShipStrategyType.FORWARD && this.solutionDeltaImmediatelyAfterSolutionJoin) { // we do not need this extra node. we can make the predecessor the delta // sanity check the node and connection if (siSolutionDeltaCandidate.getDriverStrategy() != DriverStrategy.UNARY_NO_OP || siSolutionDeltaCandidate.getInput().getLocalStrategy() != LocalStrategy.NONE) { throw new CompilerException("Invalid Solution set delta node."); } solutionSetCandidate = siSolutionDeltaCandidate.getInput().getSource(); immediateDeltaUpdate = true; } else { // was not partitioned, we need to keep this node. // mark that we materialize the input siSolutionDeltaCandidate.getInput().setTempMode(TempMode.PIPELINE_BREAKER); immediateDeltaUpdate = false; } WorksetIterationPlanNode wsNode = new WorksetIterationPlanNode( this, "WorksetIteration (" + this.getPactContract().getName() + ")", solutionSetIn, worksetIn, sspn, wspn, worksetCandidate, solutionSetCandidate); wsNode.setImmediateSolutionSetUpdate(immediateDeltaUpdate); wsNode.initProperties(gp, lp); target.add(wsNode); } } } }