public static RedistFeatureStep buildLookupJoinRedist( PlannerContext pc, RedistFeatureStep lookupTable, List<ExpressionNode> lookupJoinColumns, PEStorageGroup targetGroup, JoinEntry origEntry, List<ExpressionNode> origJoinColumns, PartitionEntry nonLookupSide) throws PEException { // we will modify the existing non lookup side projecting feature step, and add the lookup table // as a requirement to it. ProjectingFeatureStep nonLookupStep = (ProjectingFeatureStep) nonLookupSide.getStep(null); nonLookupSide.maybeForceDoublePrecision(nonLookupStep); SelectStatement lookupSelect = lookupTable.getTargetTempTable().buildSelect(pc.getContext()); SelectStatement nonLookupSelect = (SelectStatement) nonLookupStep.getPlannedStatement(); SelectStatement actualJoinStatement = DMLStatementUtils.compose(pc.getContext(), nonLookupSelect, lookupSelect); List<ExpressionNode> ands = ExpressionUtils.decomposeAndClause(actualJoinStatement.getWhereClause()); // instead of building the join spec directly, map forward the original join condition from the // joined table if it's available IndexCollector ic = new IndexCollector(); if (origEntry.getJoin().getJoin() != null) { FunctionCall mapped = (FunctionCall) actualJoinStatement .getMapper() .copyForward(origEntry.getJoin().getJoin().getJoinOn()); ands.add(mapped); ListSet<ColumnInstance> cols = ColumnInstanceCollector.getColumnInstances(mapped); for (ColumnInstance ci : cols) ic.addColumnInstance(ci); } else { Map<RewriteKey, ExpressionNode> projEntries = null; for (int i = 0; i < origJoinColumns.size(); i++) { ColumnKey mck = actualJoinStatement.getMapper().mapExpressionToColumn(origJoinColumns.get(i)); ColumnKey muck = actualJoinStatement.getMapper().mapExpressionToColumn(lookupJoinColumns.get(i)); ExpressionNode mc = null; ExpressionNode muc = null; if (mck == null || muck == null) { if (projEntries == null) { projEntries = new HashMap<RewriteKey, ExpressionNode>(); for (ExpressionNode en : actualJoinStatement.getProjectionEdge()) { ExpressionNode actual = ExpressionUtils.getTarget(en); if (actual instanceof ColumnInstance) { projEntries.put(((ColumnInstance) actual).getColumnKey(), actual); } else { projEntries.put(new ExpressionKey(actual), actual); } } } if (mck == null) mc = (ExpressionNode) projEntries.get(new ExpressionKey(origJoinColumns.get(i))).copy(null); if (muck == null) mc = (ExpressionNode) projEntries.get(new ExpressionKey(lookupJoinColumns.get(i))).copy(null); } if (mc == null) mc = mck.toInstance(); if (muc == null) muc = muck.toInstance(); if (mc instanceof ColumnInstance) ic.addColumnInstance((ColumnInstance) mc); if (muc instanceof ColumnInstance) ic.addColumnInstance((ColumnInstance) muc); FunctionCall eq = new FunctionCall(FunctionName.makeEquals(), mc, muc); ands.add(eq); } } ic.setIndexes(origEntry.getSchemaContext()); TempTable lookupEntryTarget = lookupTable.getTargetTempTable(); // everything from the lhs that's in the projection should be cleared - it's invisible // note that we do it after building the where clause so that the original join condition can be // mapped for (Iterator<ExpressionNode> iter = actualJoinStatement.getProjectionEdge().iterator(); iter.hasNext(); ) { ExpressionNode en = iter.next(); ExpressionNode targ = ExpressionUtils.getTarget(en); if (targ instanceof ColumnInstance) { ColumnInstance ci = (ColumnInstance) targ; if (ci.getTableInstance().getAbstractTable() == lookupEntryTarget) iter.remove(); } } actualJoinStatement.setWhereClause(ExpressionUtils.safeBuildAnd(ands)); actualJoinStatement.normalize(origEntry.getSchemaContext()); // build a new projecting feature step ProjectingFeatureStep lookupJoinStep = DefaultFeatureStepBuilder.INSTANCE.buildProjectingStep( origEntry.getPlannerContext(), origEntry.getFeaturePlanner(), actualJoinStatement, new ExecutionCost(false, false, null, -1), nonLookupStep.getSourceGroup(), actualJoinStatement.getDatabase(origEntry.getSchemaContext()), nonLookupStep.getDistributionVector(), null, DMLExplainReason.LOOKUP_JOIN.makeRecord()); // arrange for the children of the nonlookup side to become my children // and add the lookup table step as well lookupJoinStep.getSelfChildren().addAll(nonLookupStep.getAllChildren()); lookupJoinStep.getSelfChildren().add(lookupTable); // children must be sequential - no need to modify here List<Integer> mappedRedistOn = nonLookupSide.mapDistributedOn(origJoinColumns, actualJoinStatement); // now remove the lookup table from the mapper - have to do this pretty late - at this point // the query won't be manipulated any more actualJoinStatement.getMapper().remove(lookupEntryTarget); RedistFeatureStep out = lookupJoinStep.redist( origEntry.getPlannerContext(), origEntry.getFeaturePlanner(), new TempTableCreateOptions(Model.STATIC, targetGroup) .distributeOn(mappedRedistOn) .withRowCount(origEntry.getScore().getRowCount()), null, DMLExplainReason.LOOKUP_JOIN.makeRecord()); return out; }
@Override public JoinedPartitionEntry build() throws PEException { ExecutionCost combinedCost = buildCombinedCost(); PEStorageGroup tempGroup = getPlannerContext().getTempGroupManager().getGroup(combinedCost.getGroupScore()); StrategyTable constrained = null; StrategyTable unconstrained = null; if (constrainedSide == left) { constrained = left; unconstrained = right; } else { constrained = right; unconstrained = left; } // we've found the constrained and unconstrained sides - now find the columns in the join List<ExpressionNode> constrainedJC = constrained .getEntry() .mapDistributedOn(rje.getJoin().getRedistJoinExpressions(constrained.getSingleTable())); List<ExpressionNode> unconstrainedJC = unconstrained .getEntry() .mapDistributedOn( rje.getJoin().getRedistJoinExpressions(unconstrained.getSingleTable())); // redistribute the constrained side onto the temp group. RedistFeatureStep redistConstrained = colocateViaRedist( getPlannerContext(), getJoin(), constrained.getSingleTable(), constrained.getEntry(), Model.STATIC, tempGroup, true, explain, rje.getFeaturePlanner()); // the constrained side is represented by this temp table constrained.getEntry().setStep(redistConstrained); // next, we have to build the select distinct(id) redist'd to the pg RedistFeatureStep lookupTableStep = buildLookupTableRedist( getPlannerContext(), constrained.getEntry(), redistConstrained, rje, unconstrained.getGroup(), true); // now we have the unconstrained side still on the pers group // and we have a new temp table (lookupTableStep) also on the pers group. // this temp table is the result of the join between the lookup table // and the unconstrained side. the unconstrained entry will be represented by // this temp table. RedistFeatureStep redistUnconstrained = buildLookupJoinRedist( getPlannerContext(), lookupTableStep, constrainedJC, tempGroup, rje, unconstrainedJC, unconstrained.getEntry()); unconstrained.getEntry().setStep(redistUnconstrained); // so now we have redistConstrained and redistUnconstrained, we can build the ipe. // the two RedistFeatureSteps end up being the children for the new ipe, but we need to build // out the join kernel. // figure out which is left and right now PartitionEntry leftEntry = null; PartitionEntry rightEntry = null; if (constrainedSide == left) { leftEntry = constrained.getEntry(); rightEntry = unconstrained.getEntry(); } else { leftEntry = unconstrained.getEntry(); rightEntry = constrained.getEntry(); } RedistFeatureStep leftStep = (RedistFeatureStep) leftEntry.getStep(null); RedistFeatureStep rightStep = (RedistFeatureStep) rightEntry.getStep(null); return buildResultEntry( leftStep.buildNewSelect(getPlannerContext()), leftStep.getTargetTempTable().getDistributionVector(getSchemaContext()), rightStep.buildNewSelect(getPlannerContext()), rightStep.getTargetTempTable().getDistributionVector(getSchemaContext()), combinedCost, tempGroup, false // never parallel because we have an ordering issue ); }
// we have the temp table on the temp group for the constrained side // we need to build the bcast temp table on the pers group public static RedistFeatureStep buildLookupTableRedist( PlannerContext pc, PartitionEntry srcEntry, RedistFeatureStep constrainedOnTempGroup, JoinEntry rje, PEStorageGroup targetGroup, boolean indexJoinColumns) throws PEException { ProjectingFeatureStep selectDistinct = constrainedOnTempGroup.buildNewProjectingStep( pc, rje.getFeaturePlanner(), null, DMLExplainReason.LOOKUP_JOIN_LOOKUP_TABLE.makeRecord()); SelectStatement ss = (SelectStatement) selectDistinct.getPlannedStatement(); DistributionVector distVect = constrainedOnTempGroup.getTargetTempTable().getDistributionVector(pc.getContext()); ListSet<ColumnKey> mappedColumnsInJoin = null; if (rje.getJoin().getJoin() != null) { ListSet<ColumnKey> columnsInJoin = ColumnInstanceCollector.getColumnKeys( ColumnInstanceCollector.getColumnInstances(rje.getJoin().getJoin().getJoinOn())); // build the set of columns in the src entry projection ListSet<ColumnKey> srcColumns = new ListSet<ColumnKey>(); for (BufferEntry be : srcEntry.getBufferEntries()) { ExpressionNode targ = ExpressionUtils.getTarget(be.getTarget()); if (targ instanceof ColumnInstance) { ColumnInstance ci = (ColumnInstance) targ; srcColumns.add(ci.getColumnKey()); } } columnsInJoin.retainAll(srcColumns); mappedColumnsInJoin = new ListSet<ColumnKey>(); for (ColumnKey ck : columnsInJoin) { mappedColumnsInJoin.add(ss.getMapper().copyColumnKeyForward(ck)); } } for (Iterator<ExpressionNode> iter = ss.getProjectionEdge().iterator(); iter.hasNext(); ) { ExpressionNode en = iter.next(); if (en instanceof ColumnInstance) { ColumnInstance ci = (ColumnInstance) en; PEColumn pec = ci.getPEColumn(); if (!distVect.contains(srcEntry.getSchemaContext(), pec) && (mappedColumnsInJoin == null || !mappedColumnsInJoin.contains(ci.getColumnKey()))) iter.remove(); } } ss.setSetQuantifier(SetQuantifier.DISTINCT); ss.normalize(srcEntry.getSchemaContext()); // now that we have the select distinct set up // redistribute it bcast back onto the pers group RedistFeatureStep out = selectDistinct.redist( pc, rje.getFeaturePlanner(), new TempTableCreateOptions(Model.BROADCAST, targetGroup), null, DMLExplainReason.LOOKUP_JOIN_LOOKUP_TABLE.makeRecord()); if (indexJoinColumns) out.getTargetTempTable() .noteJoinedColumns(pc.getContext(), out.getTargetTempTable().getColumns(pc.getContext())); return out; }