Beispiel #1
1
  /**
   * Compute a logical, reasonably efficient join on the specified tables. See project description
   * for hints on how this should be implemented.
   *
   * @param stats Statistics for each table involved in the join, referenced by base table names,
   *     not alias
   * @param filterSelectivities Selectivities of the filter predicates on each table in the join,
   *     referenced by table alias (if no alias, the base table name)
   * @param explain Indicates whether your code should explain its query plan or simply execute it
   * @return A Vector<LogicalJoinNode> that stores joins in the left-deep order in which they should
   *     be executed.
   * @throws ParsingException when stats or filter selectivities is missing a table in the join, or
   *     or when another internal error occurs
   */
  public Vector<LogicalJoinNode> orderJoins(
      HashMap<String, TableStats> stats,
      HashMap<String, Double> filterSelectivities,
      boolean explain)
      throws ParsingException {

    // See the project writeup for some hints as to how this function
    // should work.

    // some code goes here
    PlanCache pc = new PlanCache();
    for (int i = 1; i <= joins.size(); i++) {
      Set<Set<LogicalJoinNode>> subsets = enumerateSubsets(joins, i);
      for (Set<LogicalJoinNode> subset : subsets) {
        CostCard best = new CostCard();
        best.cost = Double.MAX_VALUE;
        for (LogicalJoinNode node : subset) {
          CostCard subCard =
              computeCostAndCardOfSubplan(stats, filterSelectivities, node, subset, best.cost, pc);
          if (subCard != null) {
            if (subCard.cost < best.cost) best = subCard;
            pc.addPlan(subset, best.cost, best.card, best.plan);
          }
        }
      }
    }
    if (explain) printJoins(joins, pc, stats, filterSelectivities);
    HashSet<LogicalJoinNode> joinSet = new HashSet<LogicalJoinNode>();
    for (int i = 0; i < joins.size(); i++) {
      joinSet.add(joins.get(i));
    }
    Vector<LogicalJoinNode> order = pc.getOrder(joinSet);
    if (order != null) return order;
    return joins;
  }
Beispiel #2
0
  /**
   * This is a helper method that computes the cost and cardinality of joining joinToRemove to
   * joinSet (joinSet should contain joinToRemove), given that all of the subsets of size
   * joinSet.size() - 1 have already been computed and stored in PlanCache pc.
   *
   * @param stats table stats for all of the tables, referenced by table names rather than alias
   *     (see {@link #orderJoins})
   * @param filterSelectivities the selectivities of the filters over each of the tables (where
   *     tables are indentified by their alias or name if no alias is given)
   * @param joinToRemove the join to remove from joinSet
   * @param joinSet the set of joins being considered
   * @param bestCostSoFar the best way to join joinSet so far (minimum of previous invocations of
   *     computeCostAndCardOfSubplan for this joinSet, from returned CostCard)
   * @param pc the PlanCache for this join; should have subplans for all plans of size
   *     joinSet.size()-1
   * @return A {@link CostCard} objects desribing the cost, cardinality, optimal subplan
   * @throws ParsingException when stats, filterSelectivities, or pc object is missing tables
   *     involved in join
   */
  @SuppressWarnings("unchecked")
  private CostCard computeCostAndCardOfSubplan(
      HashMap<String, TableStats> stats,
      HashMap<String, Double> filterSelectivities,
      LogicalJoinNode joinToRemove,
      Set<LogicalJoinNode> joinSet,
      double bestCostSoFar,
      PlanCache pc)
      throws ParsingException {

    LogicalJoinNode j = joinToRemove;

    Vector<LogicalJoinNode> prevBest;

    if (this.p.getTableId(j.t1Alias) == null)
      throw new ParsingException("Unknown table " + j.t1Alias);
    if (this.p.getTableId(j.t2Alias) == null)
      throw new ParsingException("Unknown table " + j.t2Alias);

    String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias));
    String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias));
    String table1Alias = j.t1Alias;
    String table2Alias = j.t2Alias;

    Set<LogicalJoinNode> news = (Set<LogicalJoinNode>) ((HashSet<LogicalJoinNode>) joinSet).clone();
    news.remove(j);

    double t1cost, t2cost;
    int t1card, t2card;
    boolean leftPkey, rightPkey;

    if (news.isEmpty()) { // base case -- both are base relations
      prevBest = new Vector<LogicalJoinNode>();
      t1cost = stats.get(table1Name).estimateScanCost();
      t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias));
      leftPkey = isPkey(j.t1Alias, j.f1PureName);

      t2cost = table2Alias == null ? 0 : stats.get(table2Name).estimateScanCost();
      t2card =
          table2Alias == null
              ? 0
              : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias));
      rightPkey = table2Alias == null ? false : isPkey(table2Alias, j.f2PureName);
    } else {
      // news is not empty -- figure best way to join j to news
      prevBest = pc.getOrder(news);

      // possible that we have not cached an answer, if subset
      // includes a cross product
      if (prevBest == null) {
        return null;
      }

      double prevBestCost = pc.getCost(news);
      int bestCard = pc.getCard(news);

      // estimate cost of right subtree
      if (doesJoin(prevBest, table1Alias)) { // j.t1 is in prevBest
        t1cost = prevBestCost; // left side just has cost of whatever
        // left
        // subtree is
        t1card = bestCard;
        leftPkey = hasPkey(prevBest);

        t2cost = j.t2Alias == null ? 0 : stats.get(table2Name).estimateScanCost();
        t2card =
            j.t2Alias == null
                ? 0
                : stats
                    .get(table2Name)
                    .estimateTableCardinality(filterSelectivities.get(j.t2Alias));
        rightPkey = j.t2Alias == null ? false : isPkey(j.t2Alias, j.f2PureName);
      } else if (doesJoin(prevBest, j.t2Alias)) { // j.t2 is in prevbest
        // (both
        // shouldn't be)
        t2cost = prevBestCost; // left side just has cost of whatever
        // left
        // subtree is
        t2card = bestCard;
        rightPkey = hasPkey(prevBest);

        t1cost = stats.get(table1Name).estimateScanCost();
        t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias));
        leftPkey = isPkey(j.t1Alias, j.f1PureName);

      } else {
        // don't consider this plan if one of j.t1 or j.t2
        // isn't a table joined in prevBest (cross product)
        return null;
      }
    }

    // case where prevbest is left
    double cost1 = estimateJoinCost(j, t1card, t2card, t1cost, t2cost);

    LogicalJoinNode j2 = j.swapInnerOuter();
    double cost2 = estimateJoinCost(j2, t2card, t1card, t2cost, t1cost);
    if (cost2 < cost1) {
      boolean tmp;
      j = j2;
      cost1 = cost2;
      tmp = rightPkey;
      rightPkey = leftPkey;
      leftPkey = tmp;
    }
    if (cost1 >= bestCostSoFar) return null;

    CostCard cc = new CostCard();

    cc.card = estimateJoinCardinality(j, t1card, t2card, leftPkey, rightPkey, stats);
    cc.cost = cost1;
    cc.plan = (Vector<LogicalJoinNode>) prevBest.clone();
    cc.plan.addElement(j); // prevbest is left -- add new join to end
    return cc;
  }