/** * Compute a logical, reasonably efficient join on the specified tables. See project description * for hints on how this should be implemented. * * @param stats Statistics for each table involved in the join, referenced by base table names, * not alias * @param filterSelectivities Selectivities of the filter predicates on each table in the join, * referenced by table alias (if no alias, the base table name) * @param explain Indicates whether your code should explain its query plan or simply execute it * @return A Vector<LogicalJoinNode> that stores joins in the left-deep order in which they should * be executed. * @throws ParsingException when stats or filter selectivities is missing a table in the join, or * or when another internal error occurs */ public Vector<LogicalJoinNode> orderJoins( HashMap<String, TableStats> stats, HashMap<String, Double> filterSelectivities, boolean explain) throws ParsingException { // See the project writeup for some hints as to how this function // should work. // some code goes here PlanCache pc = new PlanCache(); for (int i = 1; i <= joins.size(); i++) { Set<Set<LogicalJoinNode>> subsets = enumerateSubsets(joins, i); for (Set<LogicalJoinNode> subset : subsets) { CostCard best = new CostCard(); best.cost = Double.MAX_VALUE; for (LogicalJoinNode node : subset) { CostCard subCard = computeCostAndCardOfSubplan(stats, filterSelectivities, node, subset, best.cost, pc); if (subCard != null) { if (subCard.cost < best.cost) best = subCard; pc.addPlan(subset, best.cost, best.card, best.plan); } } } } if (explain) printJoins(joins, pc, stats, filterSelectivities); HashSet<LogicalJoinNode> joinSet = new HashSet<LogicalJoinNode>(); for (int i = 0; i < joins.size(); i++) { joinSet.add(joins.get(i)); } Vector<LogicalJoinNode> order = pc.getOrder(joinSet); if (order != null) return order; return joins; }
/** * Find the table in the plan, and return the cost and cardinality of the intermediate joined * table during the execution */ private CostCard getCostCard( Vector<LogicalJoinNode> plan, Vector<Integer> planCardinalities, Vector<Double> planCosts, String table) { for (int i = plan.size() - 1; i >= 0; i--) { LogicalJoinNode j = plan.get(i); if (j.t1Alias.equals(table) || (j.t2Alias != null && j.t2Alias.equals(table))) { CostCard c = new CostCard(); c.card = planCardinalities.get(i); c.cost = planCosts.get(i); return c; } } return null; }
/** * This is a helper method that computes the cost and cardinality of joining joinToRemove to * joinSet (joinSet should contain joinToRemove), given that all of the subsets of size * joinSet.size() - 1 have already been computed and stored in PlanCache pc. * * @param stats table stats for all of the tables, referenced by table names rather than alias * (see {@link #orderJoins}) * @param filterSelectivities the selectivities of the filters over each of the tables (where * tables are indentified by their alias or name if no alias is given) * @param joinToRemove the join to remove from joinSet * @param joinSet the set of joins being considered * @param bestCostSoFar the best way to join joinSet so far (minimum of previous invocations of * computeCostAndCardOfSubplan for this joinSet, from returned CostCard) * @param pc the PlanCache for this join; should have subplans for all plans of size * joinSet.size()-1 * @return A {@link CostCard} objects desribing the cost, cardinality, optimal subplan * @throws ParsingException when stats, filterSelectivities, or pc object is missing tables * involved in join */ @SuppressWarnings("unchecked") private CostCard computeCostAndCardOfSubplan( HashMap<String, TableStats> stats, HashMap<String, Double> filterSelectivities, LogicalJoinNode joinToRemove, Set<LogicalJoinNode> joinSet, double bestCostSoFar, PlanCache pc) throws ParsingException { LogicalJoinNode j = joinToRemove; Vector<LogicalJoinNode> prevBest; if (this.p.getTableId(j.t1Alias) == null) throw new ParsingException("Unknown table " + j.t1Alias); if (this.p.getTableId(j.t2Alias) == null) throw new ParsingException("Unknown table " + j.t2Alias); String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias)); String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias)); String table1Alias = j.t1Alias; String table2Alias = j.t2Alias; Set<LogicalJoinNode> news = (Set<LogicalJoinNode>) ((HashSet<LogicalJoinNode>) joinSet).clone(); news.remove(j); double t1cost, t2cost; int t1card, t2card; boolean leftPkey, rightPkey; if (news.isEmpty()) { // base case -- both are base relations prevBest = new Vector<LogicalJoinNode>(); t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); t2cost = table2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = table2Alias == null ? 0 : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = table2Alias == null ? false : isPkey(table2Alias, j.f2PureName); } else { // news is not empty -- figure best way to join j to news prevBest = pc.getOrder(news); // possible that we have not cached an answer, if subset // includes a cross product if (prevBest == null) { return null; } double prevBestCost = pc.getCost(news); int bestCard = pc.getCard(news); // estimate cost of right subtree if (doesJoin(prevBest, table1Alias)) { // j.t1 is in prevBest t1cost = prevBestCost; // left side just has cost of whatever // left // subtree is t1card = bestCard; leftPkey = hasPkey(prevBest); t2cost = j.t2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = j.t2Alias == null ? 0 : stats .get(table2Name) .estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = j.t2Alias == null ? false : isPkey(j.t2Alias, j.f2PureName); } else if (doesJoin(prevBest, j.t2Alias)) { // j.t2 is in prevbest // (both // shouldn't be) t2cost = prevBestCost; // left side just has cost of whatever // left // subtree is t2card = bestCard; rightPkey = hasPkey(prevBest); t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); } else { // don't consider this plan if one of j.t1 or j.t2 // isn't a table joined in prevBest (cross product) return null; } } // case where prevbest is left double cost1 = estimateJoinCost(j, t1card, t2card, t1cost, t2cost); LogicalJoinNode j2 = j.swapInnerOuter(); double cost2 = estimateJoinCost(j2, t2card, t1card, t2cost, t1cost); if (cost2 < cost1) { boolean tmp; j = j2; cost1 = cost2; tmp = rightPkey; rightPkey = leftPkey; leftPkey = tmp; } if (cost1 >= bestCostSoFar) return null; CostCard cc = new CostCard(); cc.card = estimateJoinCardinality(j, t1card, t2card, leftPkey, rightPkey, stats); cc.cost = cost1; cc.plan = (Vector<LogicalJoinNode>) prevBest.clone(); cc.plan.addElement(j); // prevbest is left -- add new join to end return cc; }
/** * This is a helper method that computes the cost and cardinality of joining a LogicalJoinNode j * to the current greedy plan we have built up. * * @param j the join to try adding to our plan * @param plan the current plan we have built so far from the greedy algorithm, a Vector of * LogicalJoinNodes that we've so far chosen. * @param planCardinalities given the join order from plan, we also keep track of how large joined * tables are, so we can help estimate the cardinality and cost of this next join * @param planCosts given the join order from plan, we also keep track of how expensive executing * some joins are, so we can help estimate the cardinality and cost of this next join * @param stats table stats for all of the tables, referenced by table names rather than alias * (see {@link #orderGreedyJoins(HashMap, HashMap)}) * @param filterSelectivities the selectivities of the filters over each of the tables (where * tables are indentified by their alias or name if no alias is given) * @return A {@link CostCard} objects desribing the cost, cardinality, optimal subplan * @throws ParsingException when stats, filterSelectivities, or pc object is missing tables * involved in join */ private CostCard costGreedyJoin( LogicalJoinNode j, Vector<LogicalJoinNode> plan, Vector<Integer> planCardinalities, Vector<Double> planCosts, HashMap<String, TableStats> stats, HashMap<String, Double> filterSelectivities) throws ParsingException { if (this.p.getTableId(j.t1Alias) == null) throw new ParsingException("Unknown table " + j.t1Alias); if (this.p.getTableId(j.t2Alias) == null) throw new ParsingException("Unknown table " + j.t2Alias); String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias)); String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias)); String table1Alias = j.t1Alias; String table2Alias = j.t2Alias; double t1cost, t2cost; int t1card, t2card; boolean leftPkey, rightPkey; // estimate cost of right subtree if (doesJoin(plan, table1Alias)) { // j.t1 is in plan already CostCard c = getCostCard(plan, planCardinalities, planCosts, table1Alias); t1cost = c.cost; // left side just has cost of whatever left subtree is t1card = c.card; leftPkey = hasPkey(plan); t2cost = j.t2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = j.t2Alias == null ? 0 : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = j.t2Alias == null ? false : isPkey(j.t2Alias, j.f2PureName); } else if (doesJoin(plan, j.t2Alias)) { // j.t2 is in plan // (else if since both j.t1 and j.t2 shouldn't both be) CostCard c = getCostCard(plan, planCardinalities, planCosts, table2Alias); t2cost = c.cost; t2card = c.card; rightPkey = hasPkey(plan); t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); } else { // Neither is a plan, both are just single tables t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); t2cost = table2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = table2Alias == null ? 0 : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = table2Alias == null ? false : isPkey(table2Alias, j.f2PureName); } double cost1 = estimateJoinCost(j, t1card, t2card, t1cost, t2cost); LogicalJoinNode j2 = j.swapInnerOuter(); double cost2 = estimateJoinCost(j2, t2card, t1card, t2cost, t1cost); if (cost2 < cost1) { boolean tmp; j = j2; cost1 = cost2; tmp = rightPkey; rightPkey = leftPkey; leftPkey = tmp; } CostCard cc = new CostCard(); cc.card = estimateJoinCardinality(j, t1card, t2card, leftPkey, rightPkey, stats); cc.cost = cost1; return cc; }