public static RedistFeatureStep buildLookupJoinRedist( PlannerContext pc, RedistFeatureStep lookupTable, List<ExpressionNode> lookupJoinColumns, PEStorageGroup targetGroup, JoinEntry origEntry, List<ExpressionNode> origJoinColumns, PartitionEntry nonLookupSide) throws PEException { // we will modify the existing non lookup side projecting feature step, and add the lookup table // as a requirement to it. ProjectingFeatureStep nonLookupStep = (ProjectingFeatureStep) nonLookupSide.getStep(null); nonLookupSide.maybeForceDoublePrecision(nonLookupStep); SelectStatement lookupSelect = lookupTable.getTargetTempTable().buildSelect(pc.getContext()); SelectStatement nonLookupSelect = (SelectStatement) nonLookupStep.getPlannedStatement(); SelectStatement actualJoinStatement = DMLStatementUtils.compose(pc.getContext(), nonLookupSelect, lookupSelect); List<ExpressionNode> ands = ExpressionUtils.decomposeAndClause(actualJoinStatement.getWhereClause()); // instead of building the join spec directly, map forward the original join condition from the // joined table if it's available IndexCollector ic = new IndexCollector(); if (origEntry.getJoin().getJoin() != null) { FunctionCall mapped = (FunctionCall) actualJoinStatement .getMapper() .copyForward(origEntry.getJoin().getJoin().getJoinOn()); ands.add(mapped); ListSet<ColumnInstance> cols = ColumnInstanceCollector.getColumnInstances(mapped); for (ColumnInstance ci : cols) ic.addColumnInstance(ci); } else { Map<RewriteKey, ExpressionNode> projEntries = null; for (int i = 0; i < origJoinColumns.size(); i++) { ColumnKey mck = actualJoinStatement.getMapper().mapExpressionToColumn(origJoinColumns.get(i)); ColumnKey muck = actualJoinStatement.getMapper().mapExpressionToColumn(lookupJoinColumns.get(i)); ExpressionNode mc = null; ExpressionNode muc = null; if (mck == null || muck == null) { if (projEntries == null) { projEntries = new HashMap<RewriteKey, ExpressionNode>(); for (ExpressionNode en : actualJoinStatement.getProjectionEdge()) { ExpressionNode actual = ExpressionUtils.getTarget(en); if (actual instanceof ColumnInstance) { projEntries.put(((ColumnInstance) actual).getColumnKey(), actual); } else { projEntries.put(new ExpressionKey(actual), actual); } } } if (mck == null) mc = (ExpressionNode) projEntries.get(new ExpressionKey(origJoinColumns.get(i))).copy(null); if (muck == null) mc = (ExpressionNode) projEntries.get(new ExpressionKey(lookupJoinColumns.get(i))).copy(null); } if (mc == null) mc = mck.toInstance(); if (muc == null) muc = muck.toInstance(); if (mc instanceof ColumnInstance) ic.addColumnInstance((ColumnInstance) mc); if (muc instanceof ColumnInstance) ic.addColumnInstance((ColumnInstance) muc); FunctionCall eq = new FunctionCall(FunctionName.makeEquals(), mc, muc); ands.add(eq); } } ic.setIndexes(origEntry.getSchemaContext()); TempTable lookupEntryTarget = lookupTable.getTargetTempTable(); // everything from the lhs that's in the projection should be cleared - it's invisible // note that we do it after building the where clause so that the original join condition can be // mapped for (Iterator<ExpressionNode> iter = actualJoinStatement.getProjectionEdge().iterator(); iter.hasNext(); ) { ExpressionNode en = iter.next(); ExpressionNode targ = ExpressionUtils.getTarget(en); if (targ instanceof ColumnInstance) { ColumnInstance ci = (ColumnInstance) targ; if (ci.getTableInstance().getAbstractTable() == lookupEntryTarget) iter.remove(); } } actualJoinStatement.setWhereClause(ExpressionUtils.safeBuildAnd(ands)); actualJoinStatement.normalize(origEntry.getSchemaContext()); // build a new projecting feature step ProjectingFeatureStep lookupJoinStep = DefaultFeatureStepBuilder.INSTANCE.buildProjectingStep( origEntry.getPlannerContext(), origEntry.getFeaturePlanner(), actualJoinStatement, new ExecutionCost(false, false, null, -1), nonLookupStep.getSourceGroup(), actualJoinStatement.getDatabase(origEntry.getSchemaContext()), nonLookupStep.getDistributionVector(), null, DMLExplainReason.LOOKUP_JOIN.makeRecord()); // arrange for the children of the nonlookup side to become my children // and add the lookup table step as well lookupJoinStep.getSelfChildren().addAll(nonLookupStep.getAllChildren()); lookupJoinStep.getSelfChildren().add(lookupTable); // children must be sequential - no need to modify here List<Integer> mappedRedistOn = nonLookupSide.mapDistributedOn(origJoinColumns, actualJoinStatement); // now remove the lookup table from the mapper - have to do this pretty late - at this point // the query won't be manipulated any more actualJoinStatement.getMapper().remove(lookupEntryTarget); RedistFeatureStep out = lookupJoinStep.redist( origEntry.getPlannerContext(), origEntry.getFeaturePlanner(), new TempTableCreateOptions(Model.STATIC, targetGroup) .distributeOn(mappedRedistOn) .withRowCount(origEntry.getScore().getRowCount()), null, DMLExplainReason.LOOKUP_JOIN.makeRecord()); return out; }
/** * The redundant column is eliminated and the column order is correct according to standard SQL: * * <p>First, coalesced common columns of the two joined tables, in the order in which they occur * in the first table. * * <p>Second, columns unique to the first table, in order in which they occur in that table. * * <p>Third, columns unique to the second table, in order in which they occur in that table. */ private Collection<ExpressionNode> buildProjection( final SchemaContext sc, final Map<TableInstance, Set<ColumnKey>> tablesAndJoinColumns) { final ListSetMap<ColumnKey, ExpressionNode> projection = new ListSetMap<ColumnKey, ExpressionNode>(); /* * We need to coalesce the join columns by their unqualified names. * Build a lookup map. */ final Map<ColumnKey, Set<ColumnInstance>> toCoalesceName = new TreeMap<ColumnKey, Set<ColumnInstance>>( new Comparator<ColumnKey>() { @Override public int compare(ColumnKey c1, ColumnKey c2) { final String n1 = getColumnName(c1).get(); final String n2 = getColumnName(c2).get(); return n1.compareTo(n2); } }); /* * Here we the projection. Join columns are inserted in order in which * they occur in the first table and coalesced by name in set * containers. */ for (final Map.Entry<TableInstance, Set<ColumnKey>> te : tablesAndJoinColumns.entrySet()) { final Set<ColumnKey> entryJoinColumns = te.getValue(); for (final ColumnKey ck : entryJoinColumns) { final ColumnInstance ci = ck.toInstance(); if (!toCoalesceName.containsKey(ck)) { projection.put(ck, ci); final Set<ColumnInstance> coalescedColumns = new HashSet<ColumnInstance>(); coalescedColumns.add(ci); toCoalesceName.put(ck, coalescedColumns); } else { toCoalesceName.get(ck).add(ci); } } addColumns(sc, projection, te.getKey(), entryJoinColumns); } /* * We already have the right projection. */ if (toCoalesceName.isEmpty()) { return projection.values(); } /* * It turns out that in case of multi-join statements MySQL also sorts * adjacent join columns by the number of coalesced columns (by the * frequency with which they appear in the USING clauses). * * i.e. * "SELECT * FROM pe251C c RIGHT JOIN pe251D d USING (id1,id2,id3) LEFT OUTER JOIN pe251E e USING (id1,id3) ORDER BY c.id1;" * * Although the first-table order would be (id2, id1, id3): * "COALESCE(id2, id2), COALESCE(id1, id1, id1), COALESCE(id3, id3, id3)" * * MySQL sorts the columns as (id1, id3, id2): * "COALESCE(id1, id1, id1), COALESCE(id3, id3, id3), COALESCE(id2, id2)" * * Sort the adjacent coalesced column sets by their size using a stable * algorithm. */ final Map<ColumnKey, Set<ColumnInstance>> toCoalesceByEntry = new HashMap<ColumnKey, Set<ColumnInstance>>(toCoalesceName); final int endIndex = projection.size(); for (int i = 0; i < endIndex; ++i) { final ColumnKey ik = projection.getEntryAt(i).getKey(); if (toCoalesceByEntry.containsKey(ik)) { int j = i; while (j > 0) { final ColumnKey jk1 = projection.getEntryAt(j).getKey(); final ColumnKey jk2 = projection.getEntryAt(j - 1).getKey(); if (toCoalesceByEntry.containsKey(jk1) && toCoalesceByEntry.containsKey(jk2)) { final Set<ColumnInstance> jv1 = toCoalesceByEntry.get(jk1); final Set<ColumnInstance> jv2 = toCoalesceByEntry.get(jk2); if (jv1.size() > jv2.size()) { projection.swap(jk1, jk2); } else { break; } } else { break; } --j; } } } /* * Finally, convert the coalesced column sets into COALESCE() function * calls. */ for (final Map.Entry<ColumnKey, Set<ColumnInstance>> coalesceEntry : toCoalesceName.entrySet()) { final ColumnKey key = coalesceEntry.getKey(); final FunctionCall coalesce = new FunctionCall( FunctionName.makeCoalesce(), new ArrayList<ExpressionNode>(coalesceEntry.getValue())); final ExpressionAlias coalesceAlias = new ExpressionAlias(coalesce, new NameAlias(getColumnName(key)), false); projection.put(key, coalesceAlias); } return projection.values(); }