Beispiel #1
0
  public static void computeStatistics() {
    Iterator<Integer> tableIt = Database.getCatalog().tableIdIterator();

    System.out.println("Computing table stats.");
    while (tableIt.hasNext()) {
      int tableid = tableIt.next();
      TableStats s = new TableStats(tableid, IOCOSTPERPAGE);
      setTableStats(Database.getCatalog().getTableName(tableid), s);
    }
    System.out.println("Done.");
  }
Beispiel #2
0
 /**
  * Returns the TupleDesc with field names from the underlying HeapFile, prefixed with the
  * tableAlias string from the constructor. This prefix becomes useful when joining tables
  * containing a field(s) with the same name.
  *
  * @return the TupleDesc with field names from the underlying HeapFile, prefixed with the
  *     tableAlias string from the constructor.
  */
 public TupleDesc getTupleDesc() {
   TupleDesc tup = Database.getCatalog().getTupleDesc(id);
   int length = tup.numFields();
   String[] field = new String[length];
   Type[] types = new Type[length];
   for (int i = 0; i < length; i++) {
     types[i] = tup.getFieldType(i);
     field[i] = alias + "." + tup.getFieldName(i);
   }
   return new TupleDesc(types, field);
 }
 public void open() throws DbException, TransactionAbortedException {
   // some code goes here
   if (i_pos != null) {
     i = i_pos;
   } else {
     Catalog gc = Database.getCatalog();
     HeapFile file = (HeapFile) gc.getDbFile(tableid);
     i = file.iterator(tid);
   }
   i.open();
 }
Beispiel #4
0
  /**
   * Create a new TableStats object, that keeps track of statistics on each column of a table
   *
   * @param tableid The table over which to compute statistics
   * @param ioCostPerPage The cost per page of IO. This doesn't differentiate between
   *     sequential-scan IO and disk seeks.
   */
  public TableStats(int tableid, int ioCostPerPage) {
    // For this function, we use the DbFile for the table in question,
    // then scan through its tuples and calculate the values that you
    // to build the histograms.

    // TODO: Fill out the rest of the constructor.
    // Feel free to change anything already written, it's only a guideline

    this.ioCostPerPage = ioCostPerPage;
    DbFile file = Database.getCatalog().getDbFile(tableid);
    tupleDesc = file.getTupleDesc();
    numPages = ((HeapFile) file).numPages();
    numTuples = 0;

    int numFields = tupleDesc.numFields();

    // TODO: what goes here?
    statistics = new ArrayList<Object>();

    for (int i = 0; i < numFields; i++) {
      if (Type.INT_TYPE.equals(tupleDesc.getFieldType(i))) {
        statistics.add(new IntStatistics(NUM_HIST_BINS));
      } else {
        statistics.add(new StringHistogram(NUM_HIST_BINS));
      }
    }

    final DbFileIterator iter = file.iterator(null);
    try {
      iter.open();

      while (iter.hasNext()) {
        Tuple t = iter.next();
        numTuples++;

        // TODO: and here?
        for (int i = 0; i < numFields; i++) {
          if (Type.INT_TYPE.equals(tupleDesc.getFieldType(i))) {
            ((IntStatistics) statistics.get(i)).addValue(((IntField) t.getField(i)).getValue());
          } else {
            ((StringHistogram) statistics.get(i))
                .addValue(((StringField) t.getField(i)).getValue());
          }
        }
      }
      iter.close();
    } catch (DbException e) {
      e.printStackTrace();
    } catch (TransactionAbortedException e) {
      e.printStackTrace();
    }
  }
 private void createAliasedTd() {
   Catalog gc = Database.getCatalog();
   TupleDesc old_td = gc.getTupleDesc(tableid);
   String[] newFieldAr = new String[old_td.numFields()];
   Type[] typeAr = new Type[old_td.numFields()];
   String field = null;
   for (int i = 0; i < newFieldAr.length; i++) {
     field = old_td.getFieldName(i);
     if (alias == null) {
       alias = "null";
     } else if (field == null) {
       field = "null";
     }
     newFieldAr[i] = alias + "." + field;
     typeAr[i] = old_td.getFieldType(i);
   }
   td = new TupleDesc(typeAr, newFieldAr);
 }
  /**
   * Create a HeapPage from a set of bytes of data read from disk. The format of a HeapPage is a set
   * of header bytes indicating the slots of the page that are in use, some number of tuple slots.
   * Specifically, the number of tuples is equal to:
   *
   * <p>floor((BufferPool.PAGE_SIZE*8) / (tuple size * 8 + 1))
   *
   * <p>where tuple size is the size of tuples in this database table, which can be determined via
   * {@link Catalog#getTupleDesc}. The number of 8-bit header words is equal to:
   *
   * <p>ceiling(no. tuple slots / 8)
   *
   * <p>
   *
   * @see Database#getCatalog
   * @see Catalog#getTupleDesc
   * @see BufferPool#PAGE_SIZE
   */
  public HeapPage(HeapPageId id, byte[] data) throws IOException {
    this.pid = id;
    this.td = Database.getCatalog().getTupleDesc(id.getTableId());
    this.numSlots = getNumTuples();
    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data));

    // allocate and read the header slots of this page
    header = new byte[getHeaderSize()];
    for (int i = 0; i < header.length; i++) header[i] = dis.readByte();

    try {
      // allocate and read the actual records of this page
      tuples = new Tuple[numSlots];
      for (int i = 0; i < tuples.length; i++) tuples[i] = readNextTuple(dis, i);
    } catch (NoSuchElementException e) {
      e.printStackTrace();
    }
    dis.close();

    setBeforeImage();
  }
  /**
   * Create a HeapPage from a set of bytes of data read from disk. The format of a HeapPage is a set
   * of 32-bit header words indicating the slots of the page that are in use, plus
   * (BufferPool.PAGE_SIZE/tuple size) tuple slots, where tuple size is the size of tuples in this
   * database table, which can be determined via {@link Catalog#getTupleDesc}.
   *
   * <p>The number of 32-bit header words is equal to:
   *
   * <p>(no. tuple slots / 32) + 1
   *
   * <p>
   *
   * @see Database#getCatalog
   * @see Catalog#getTupleDesc
   * @see BufferPool#PAGE_SIZE
   */
  public HeapPage(HeapPageId id, byte[] data) throws IOException {
    this.pid = id;
    this.td = Database.getCatalog().getTupleDesc(id.tableid());
    // this.numSlots = (BufferPool.PAGE_SIZE) / (td.getSize());
    this.numSlots = (BufferPool.PAGE_SIZE * 8) / ((td.getSize() * 8) + 1);
    // System.out.println(this.numSlots);
    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data));

    // allocate and read the header slots of this page
    header = new Header(dis);

    try {
      // allocate and read the actual records of this page
      tuples = new Tuple[numSlots];
      for (int i = 0; i < numSlots; i++) {
        tuples[i] = readNextTuple(dis, i);
      }
    } catch (NoSuchElementException e) {
      // e.printStackTrace();
    }

    dis.close();
  }
Beispiel #8
0
  /**
   * Returns the TupleDesc with field names from the underlying HeapFile, prefixed with the
   * tableAlias string from the constructor. This prefix becomes useful when joining tables
   * containing a field(s) with the same name.
   *
   * @return the TupleDesc with field names from the underlying HeapFile, prefixed with the
   *     tableAlias string from the constructor.
   */
  public TupleDesc getTupleDesc() {
    // some code goes here
    TupleDesc td = Database.getCatalog().getTupleDesc(tableid);
    Iterator<TDItem> tdIter = td.iterator();
    int size = td.numFields();
    Type[] typeAr = new Type[size];
    String[] fieldAr = new String[size];

    String aliasString = this.tableAlias;

    TDItem item;
    Type fieldType;
    String fieldName;
    int count = 0;

    if (aliasString == null) {
      aliasString = "null";
    }
    //      for (int i = 0; i < size; i++){
    //    	item = tdIter.next();
    //    	fieldType = item.fieldType;
    //    	fieldName = item.fieldName;

    while (tdIter.hasNext()) {
      item = tdIter.next();
      fieldType = item.fieldType;
      fieldName = item.fieldName;
      if (fieldName == null) {
        fieldName = "null";
      }
      typeAr[count] = fieldType;
      fieldAr[count] = aliasString + "." + fieldName; // "null.null case may occur"
      count++;
    }
    return new TupleDesc(typeAr, fieldAr);
  }
Beispiel #9
0
  /**
   * This is a helper method that computes the cost and cardinality of joining joinToRemove to
   * joinSet (joinSet should contain joinToRemove), given that all of the subsets of size
   * joinSet.size() - 1 have already been computed and stored in PlanCache pc.
   *
   * @param stats table stats for all of the tables, referenced by table names rather than alias
   *     (see {@link #orderJoins})
   * @param filterSelectivities the selectivities of the filters over each of the tables (where
   *     tables are indentified by their alias or name if no alias is given)
   * @param joinToRemove the join to remove from joinSet
   * @param joinSet the set of joins being considered
   * @param bestCostSoFar the best way to join joinSet so far (minimum of previous invocations of
   *     computeCostAndCardOfSubplan for this joinSet, from returned CostCard)
   * @param pc the PlanCache for this join; should have subplans for all plans of size
   *     joinSet.size()-1
   * @return A {@link CostCard} objects desribing the cost, cardinality, optimal subplan
   * @throws ParsingException when stats, filterSelectivities, or pc object is missing tables
   *     involved in join
   */
  @SuppressWarnings("unchecked")
  private CostCard computeCostAndCardOfSubplan(
      HashMap<String, TableStats> stats,
      HashMap<String, Double> filterSelectivities,
      LogicalJoinNode joinToRemove,
      Set<LogicalJoinNode> joinSet,
      double bestCostSoFar,
      PlanCache pc)
      throws ParsingException {

    LogicalJoinNode j = joinToRemove;

    Vector<LogicalJoinNode> prevBest;

    if (this.p.getTableId(j.t1Alias) == null)
      throw new ParsingException("Unknown table " + j.t1Alias);
    if (this.p.getTableId(j.t2Alias) == null)
      throw new ParsingException("Unknown table " + j.t2Alias);

    String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias));
    String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias));
    String table1Alias = j.t1Alias;
    String table2Alias = j.t2Alias;

    Set<LogicalJoinNode> news = (Set<LogicalJoinNode>) ((HashSet<LogicalJoinNode>) joinSet).clone();
    news.remove(j);

    double t1cost, t2cost;
    int t1card, t2card;
    boolean leftPkey, rightPkey;

    if (news.isEmpty()) { // base case -- both are base relations
      prevBest = new Vector<LogicalJoinNode>();
      t1cost = stats.get(table1Name).estimateScanCost();
      t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias));
      leftPkey = isPkey(j.t1Alias, j.f1PureName);

      t2cost = table2Alias == null ? 0 : stats.get(table2Name).estimateScanCost();
      t2card =
          table2Alias == null
              ? 0
              : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias));
      rightPkey = table2Alias == null ? false : isPkey(table2Alias, j.f2PureName);
    } else {
      // news is not empty -- figure best way to join j to news
      prevBest = pc.getOrder(news);

      // possible that we have not cached an answer, if subset
      // includes a cross product
      if (prevBest == null) {
        return null;
      }

      double prevBestCost = pc.getCost(news);
      int bestCard = pc.getCard(news);

      // estimate cost of right subtree
      if (doesJoin(prevBest, table1Alias)) { // j.t1 is in prevBest
        t1cost = prevBestCost; // left side just has cost of whatever
        // left
        // subtree is
        t1card = bestCard;
        leftPkey = hasPkey(prevBest);

        t2cost = j.t2Alias == null ? 0 : stats.get(table2Name).estimateScanCost();
        t2card =
            j.t2Alias == null
                ? 0
                : stats
                    .get(table2Name)
                    .estimateTableCardinality(filterSelectivities.get(j.t2Alias));
        rightPkey = j.t2Alias == null ? false : isPkey(j.t2Alias, j.f2PureName);
      } else if (doesJoin(prevBest, j.t2Alias)) { // j.t2 is in prevbest
        // (both
        // shouldn't be)
        t2cost = prevBestCost; // left side just has cost of whatever
        // left
        // subtree is
        t2card = bestCard;
        rightPkey = hasPkey(prevBest);

        t1cost = stats.get(table1Name).estimateScanCost();
        t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias));
        leftPkey = isPkey(j.t1Alias, j.f1PureName);

      } else {
        // don't consider this plan if one of j.t1 or j.t2
        // isn't a table joined in prevBest (cross product)
        return null;
      }
    }

    // case where prevbest is left
    double cost1 = estimateJoinCost(j, t1card, t2card, t1cost, t2cost);

    LogicalJoinNode j2 = j.swapInnerOuter();
    double cost2 = estimateJoinCost(j2, t2card, t1card, t2cost, t1cost);
    if (cost2 < cost1) {
      boolean tmp;
      j = j2;
      cost1 = cost2;
      tmp = rightPkey;
      rightPkey = leftPkey;
      leftPkey = tmp;
    }
    if (cost1 >= bestCostSoFar) return null;

    CostCard cc = new CostCard();

    cc.card = estimateJoinCardinality(j, t1card, t2card, leftPkey, rightPkey, stats);
    cc.cost = cost1;
    cc.plan = (Vector<LogicalJoinNode>) prevBest.clone();
    cc.plan.addElement(j); // prevbest is left -- add new join to end
    return cc;
  }
Beispiel #10
0
 public SeqScan(TransactionId tid, int tableid) {
   this(tid, tableid, Database.getCatalog().getTableName(tableid));
 }
Beispiel #11
0
 /**
  * @return return the table name of the table the operator scans. This should be the actual name
  *     of the table in the catalog of the database
  */
 public String getTableName() {
   return Database.getCatalog().getTableName(id);
 }
Beispiel #12
0
 /**
  * Creates a sequential scan over the specified table as a part of the specified transaction.
  *
  * @param tid The transaction this scan is running as a part of.
  * @param tableid the table to scan.
  * @param tableAlias the alias of this table (needed by the parser); the returned tupleDesc should
  *     have fields with name tableAlias.fieldName (note: this class is not responsible for
  *     handling a case where tableAlias or fieldName are null. It shouldn't crash if they are, but
  *     the resulting name can be null.fieldName, tableAlias.null, or null.null).
  */
 public SeqScan(TransactionId tid, int tableid, String tableAlias) {
   alias = tableAlias;
   id = tableid;
   dbiter = Database.getCatalog().getDatabaseFile(id).iterator(tid);
 }
Beispiel #13
0
 /**
  * Static method to generate a byte array corresponding to an empty HeapPage. Used to add new,
  * empty pages to the file. Passing the results of this method to the HeapPage constructor will
  * create a HeapPage with no valid tuples in it.
  *
  * @param tableid The id of the table that this empty page will belong to.
  * @return The returned ByteArray.
  */
 public static byte[] createEmptyPageData(int tableid) {
   TupleDesc td = Database.getCatalog().getTupleDesc(tableid);
   // int hb = (((BufferPool.PAGE_SIZE / td.getSize()) / 32) +1) * 4;
   int len = BufferPool.PAGE_SIZE; // + hb;
   return new byte[len]; // all 0
 }
Beispiel #14
0
  /**
   * This is a helper method that computes the cost and cardinality of joining a LogicalJoinNode j
   * to the current greedy plan we have built up.
   *
   * @param j the join to try adding to our plan
   * @param plan the current plan we have built so far from the greedy algorithm, a Vector of
   *     LogicalJoinNodes that we've so far chosen.
   * @param planCardinalities given the join order from plan, we also keep track of how large joined
   *     tables are, so we can help estimate the cardinality and cost of this next join
   * @param planCosts given the join order from plan, we also keep track of how expensive executing
   *     some joins are, so we can help estimate the cardinality and cost of this next join
   * @param stats table stats for all of the tables, referenced by table names rather than alias
   *     (see {@link #orderGreedyJoins(HashMap, HashMap)})
   * @param filterSelectivities the selectivities of the filters over each of the tables (where
   *     tables are indentified by their alias or name if no alias is given)
   * @return A {@link CostCard} objects desribing the cost, cardinality, optimal subplan
   * @throws ParsingException when stats, filterSelectivities, or pc object is missing tables
   *     involved in join
   */
  private CostCard costGreedyJoin(
      LogicalJoinNode j,
      Vector<LogicalJoinNode> plan,
      Vector<Integer> planCardinalities,
      Vector<Double> planCosts,
      HashMap<String, TableStats> stats,
      HashMap<String, Double> filterSelectivities)
      throws ParsingException {

    if (this.p.getTableId(j.t1Alias) == null)
      throw new ParsingException("Unknown table " + j.t1Alias);
    if (this.p.getTableId(j.t2Alias) == null)
      throw new ParsingException("Unknown table " + j.t2Alias);

    String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias));
    String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias));
    String table1Alias = j.t1Alias;
    String table2Alias = j.t2Alias;

    double t1cost, t2cost;
    int t1card, t2card;
    boolean leftPkey, rightPkey;

    // estimate cost of right subtree
    if (doesJoin(plan, table1Alias)) { // j.t1 is in plan already
      CostCard c = getCostCard(plan, planCardinalities, planCosts, table1Alias);
      t1cost = c.cost; // left side just has cost of whatever left subtree is
      t1card = c.card;
      leftPkey = hasPkey(plan);

      t2cost = j.t2Alias == null ? 0 : stats.get(table2Name).estimateScanCost();
      t2card =
          j.t2Alias == null
              ? 0
              : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias));
      rightPkey = j.t2Alias == null ? false : isPkey(j.t2Alias, j.f2PureName);
    } else if (doesJoin(plan, j.t2Alias)) { // j.t2 is in plan
      // (else if since both j.t1 and j.t2 shouldn't both be)
      CostCard c = getCostCard(plan, planCardinalities, planCosts, table2Alias);
      t2cost = c.cost;
      t2card = c.card;
      rightPkey = hasPkey(plan);

      t1cost = stats.get(table1Name).estimateScanCost();
      t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias));
      leftPkey = isPkey(j.t1Alias, j.f1PureName);

    } else { // Neither is a plan, both are just single tables
      t1cost = stats.get(table1Name).estimateScanCost();
      t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias));
      leftPkey = isPkey(j.t1Alias, j.f1PureName);

      t2cost = table2Alias == null ? 0 : stats.get(table2Name).estimateScanCost();
      t2card =
          table2Alias == null
              ? 0
              : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias));
      rightPkey = table2Alias == null ? false : isPkey(table2Alias, j.f2PureName);
    }

    double cost1 = estimateJoinCost(j, t1card, t2card, t1cost, t2cost);

    LogicalJoinNode j2 = j.swapInnerOuter();
    double cost2 = estimateJoinCost(j2, t2card, t1card, t2cost, t1cost);
    if (cost2 < cost1) {
      boolean tmp;
      j = j2;
      cost1 = cost2;
      tmp = rightPkey;
      rightPkey = leftPkey;
      leftPkey = tmp;
    }

    CostCard cc = new CostCard();
    cc.card = estimateJoinCardinality(j, t1card, t2card, leftPkey, rightPkey, stats);
    cc.cost = cost1;
    return cc;
  }
Beispiel #15
0
  /**
   * Return true if field is a primary key of the specified table, false otherwise
   *
   * @param tableAlias The alias of the table in the query
   * @param field The pure name of the field
   */
  private boolean isPkey(String tableAlias, String field) {
    int tid1 = p.getTableId(tableAlias);
    String pkey1 = Database.getCatalog().getPrimaryKey(tid1);

    return pkey1.equals(field);
  }
Beispiel #16
0
  /**
   * Helper function to display a Swing window with a tree representation of the specified list of
   * joins. See {@link #orderJoins}, which may want to call this when the analyze flag is true.
   *
   * @param js the join plan to visualize
   * @param pc the PlanCache accumulated whild building the optimal plan
   * @param stats table statistics for base tables
   * @param selectivities the selectivities of the filters over each of the tables (where tables are
   *     indentified by their alias or name if no alias is given)
   */
  private void printJoins(
      Vector<LogicalJoinNode> js,
      PlanCache pc,
      HashMap<String, TableStats> stats,
      HashMap<String, Double> selectivities) {

    JFrame f = new JFrame("Join Plan for " + p.getQuery());

    // Set the default close operation for the window,
    // or else the program won't exit when clicking close button
    f.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE);

    f.setVisible(true);

    f.setSize(300, 500);

    HashMap<String, DefaultMutableTreeNode> m = new HashMap<String, DefaultMutableTreeNode>();

    // int numTabs = 0;

    // int k;
    DefaultMutableTreeNode root = null, treetop = null;
    HashSet<LogicalJoinNode> pathSoFar = new HashSet<LogicalJoinNode>();
    boolean neither;

    System.out.println(js);
    for (LogicalJoinNode j : js) {
      pathSoFar.add(j);
      System.out.println("PATH SO FAR = " + pathSoFar);

      String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias));
      String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias));

      // Double c = pc.getCost(pathSoFar);
      neither = true;

      root =
          new DefaultMutableTreeNode(
              "Join "
                  + j
                  + " (Cost ="
                  + pc.getCost(pathSoFar)
                  + ", card = "
                  + pc.getCard(pathSoFar)
                  + ")");
      DefaultMutableTreeNode n = m.get(j.t1Alias);
      if (n == null) { // never seen this table before
        n =
            new DefaultMutableTreeNode(
                j.t1Alias
                    + " (Cost = "
                    + stats.get(table1Name).estimateScanCost()
                    + ", card = "
                    + stats.get(table1Name).estimateTableCardinality(selectivities.get(j.t1Alias))
                    + ")");
        root.add(n);
      } else {
        // make left child root n
        root.add(n);
        neither = false;
      }
      m.put(j.t1Alias, root);

      n = m.get(j.t2Alias);
      if (n == null) { // never seen this table before

        n =
            new DefaultMutableTreeNode(
                j.t2Alias == null
                    ? "Subplan"
                    : (j.t2Alias
                        + " (Cost = "
                        + stats.get(table2Name).estimateScanCost()
                        + ", card = "
                        + stats
                            .get(table2Name)
                            .estimateTableCardinality(selectivities.get(j.t2Alias))
                        + ")"));
        root.add(n);
      } else {
        // make right child root n
        root.add(n);
        neither = false;
      }
      m.put(j.t2Alias, root);

      // unless this table doesn't join with other tables,
      // all tables are accessed from root
      if (!neither) {
        for (String key : m.keySet()) {
          m.put(key, root);
        }
      }

      treetop = root;
    }

    JTree tree = new JTree(treetop);
    JScrollPane treeView = new JScrollPane(tree);

    tree.setShowsRootHandles(true);

    // Set the icon for leaf nodes.
    ImageIcon leafIcon = new ImageIcon("join.jpg");
    DefaultTreeCellRenderer renderer = new DefaultTreeCellRenderer();
    renderer.setOpenIcon(leafIcon);
    renderer.setClosedIcon(leafIcon);

    tree.setCellRenderer(renderer);

    f.setSize(300, 500);

    f.add(treeView);
    for (int i = 0; i < tree.getRowCount(); i++) {
      tree.expandRow(i);
    }

    if (js.size() == 0) {
      f.add(new JLabel("No joins in plan."));
    }

    f.pack();
  }
Beispiel #17
0
 public void open() throws DbException, TransactionAbortedException {
   // some code goes here
   fileIt = Database.getCatalog().getDbFile(tableid).iterator(tid);
   fileIt.open();
 }