public static void computeStatistics() { Iterator<Integer> tableIt = Database.getCatalog().tableIdIterator(); System.out.println("Computing table stats."); while (tableIt.hasNext()) { int tableid = tableIt.next(); TableStats s = new TableStats(tableid, IOCOSTPERPAGE); setTableStats(Database.getCatalog().getTableName(tableid), s); } System.out.println("Done."); }
/** * Returns the TupleDesc with field names from the underlying HeapFile, prefixed with the * tableAlias string from the constructor. This prefix becomes useful when joining tables * containing a field(s) with the same name. * * @return the TupleDesc with field names from the underlying HeapFile, prefixed with the * tableAlias string from the constructor. */ public TupleDesc getTupleDesc() { TupleDesc tup = Database.getCatalog().getTupleDesc(id); int length = tup.numFields(); String[] field = new String[length]; Type[] types = new Type[length]; for (int i = 0; i < length; i++) { types[i] = tup.getFieldType(i); field[i] = alias + "." + tup.getFieldName(i); } return new TupleDesc(types, field); }
public void open() throws DbException, TransactionAbortedException { // some code goes here if (i_pos != null) { i = i_pos; } else { Catalog gc = Database.getCatalog(); HeapFile file = (HeapFile) gc.getDbFile(tableid); i = file.iterator(tid); } i.open(); }
/** * Create a new TableStats object, that keeps track of statistics on each column of a table * * @param tableid The table over which to compute statistics * @param ioCostPerPage The cost per page of IO. This doesn't differentiate between * sequential-scan IO and disk seeks. */ public TableStats(int tableid, int ioCostPerPage) { // For this function, we use the DbFile for the table in question, // then scan through its tuples and calculate the values that you // to build the histograms. // TODO: Fill out the rest of the constructor. // Feel free to change anything already written, it's only a guideline this.ioCostPerPage = ioCostPerPage; DbFile file = Database.getCatalog().getDbFile(tableid); tupleDesc = file.getTupleDesc(); numPages = ((HeapFile) file).numPages(); numTuples = 0; int numFields = tupleDesc.numFields(); // TODO: what goes here? statistics = new ArrayList<Object>(); for (int i = 0; i < numFields; i++) { if (Type.INT_TYPE.equals(tupleDesc.getFieldType(i))) { statistics.add(new IntStatistics(NUM_HIST_BINS)); } else { statistics.add(new StringHistogram(NUM_HIST_BINS)); } } final DbFileIterator iter = file.iterator(null); try { iter.open(); while (iter.hasNext()) { Tuple t = iter.next(); numTuples++; // TODO: and here? for (int i = 0; i < numFields; i++) { if (Type.INT_TYPE.equals(tupleDesc.getFieldType(i))) { ((IntStatistics) statistics.get(i)).addValue(((IntField) t.getField(i)).getValue()); } else { ((StringHistogram) statistics.get(i)) .addValue(((StringField) t.getField(i)).getValue()); } } } iter.close(); } catch (DbException e) { e.printStackTrace(); } catch (TransactionAbortedException e) { e.printStackTrace(); } }
private void createAliasedTd() { Catalog gc = Database.getCatalog(); TupleDesc old_td = gc.getTupleDesc(tableid); String[] newFieldAr = new String[old_td.numFields()]; Type[] typeAr = new Type[old_td.numFields()]; String field = null; for (int i = 0; i < newFieldAr.length; i++) { field = old_td.getFieldName(i); if (alias == null) { alias = "null"; } else if (field == null) { field = "null"; } newFieldAr[i] = alias + "." + field; typeAr[i] = old_td.getFieldType(i); } td = new TupleDesc(typeAr, newFieldAr); }
/** * Create a HeapPage from a set of bytes of data read from disk. The format of a HeapPage is a set * of header bytes indicating the slots of the page that are in use, some number of tuple slots. * Specifically, the number of tuples is equal to: * * <p>floor((BufferPool.PAGE_SIZE*8) / (tuple size * 8 + 1)) * * <p>where tuple size is the size of tuples in this database table, which can be determined via * {@link Catalog#getTupleDesc}. The number of 8-bit header words is equal to: * * <p>ceiling(no. tuple slots / 8) * * <p> * * @see Database#getCatalog * @see Catalog#getTupleDesc * @see BufferPool#PAGE_SIZE */ public HeapPage(HeapPageId id, byte[] data) throws IOException { this.pid = id; this.td = Database.getCatalog().getTupleDesc(id.getTableId()); this.numSlots = getNumTuples(); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data)); // allocate and read the header slots of this page header = new byte[getHeaderSize()]; for (int i = 0; i < header.length; i++) header[i] = dis.readByte(); try { // allocate and read the actual records of this page tuples = new Tuple[numSlots]; for (int i = 0; i < tuples.length; i++) tuples[i] = readNextTuple(dis, i); } catch (NoSuchElementException e) { e.printStackTrace(); } dis.close(); setBeforeImage(); }
/** * Create a HeapPage from a set of bytes of data read from disk. The format of a HeapPage is a set * of 32-bit header words indicating the slots of the page that are in use, plus * (BufferPool.PAGE_SIZE/tuple size) tuple slots, where tuple size is the size of tuples in this * database table, which can be determined via {@link Catalog#getTupleDesc}. * * <p>The number of 32-bit header words is equal to: * * <p>(no. tuple slots / 32) + 1 * * <p> * * @see Database#getCatalog * @see Catalog#getTupleDesc * @see BufferPool#PAGE_SIZE */ public HeapPage(HeapPageId id, byte[] data) throws IOException { this.pid = id; this.td = Database.getCatalog().getTupleDesc(id.tableid()); // this.numSlots = (BufferPool.PAGE_SIZE) / (td.getSize()); this.numSlots = (BufferPool.PAGE_SIZE * 8) / ((td.getSize() * 8) + 1); // System.out.println(this.numSlots); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data)); // allocate and read the header slots of this page header = new Header(dis); try { // allocate and read the actual records of this page tuples = new Tuple[numSlots]; for (int i = 0; i < numSlots; i++) { tuples[i] = readNextTuple(dis, i); } } catch (NoSuchElementException e) { // e.printStackTrace(); } dis.close(); }
/** * Returns the TupleDesc with field names from the underlying HeapFile, prefixed with the * tableAlias string from the constructor. This prefix becomes useful when joining tables * containing a field(s) with the same name. * * @return the TupleDesc with field names from the underlying HeapFile, prefixed with the * tableAlias string from the constructor. */ public TupleDesc getTupleDesc() { // some code goes here TupleDesc td = Database.getCatalog().getTupleDesc(tableid); Iterator<TDItem> tdIter = td.iterator(); int size = td.numFields(); Type[] typeAr = new Type[size]; String[] fieldAr = new String[size]; String aliasString = this.tableAlias; TDItem item; Type fieldType; String fieldName; int count = 0; if (aliasString == null) { aliasString = "null"; } // for (int i = 0; i < size; i++){ // item = tdIter.next(); // fieldType = item.fieldType; // fieldName = item.fieldName; while (tdIter.hasNext()) { item = tdIter.next(); fieldType = item.fieldType; fieldName = item.fieldName; if (fieldName == null) { fieldName = "null"; } typeAr[count] = fieldType; fieldAr[count] = aliasString + "." + fieldName; // "null.null case may occur" count++; } return new TupleDesc(typeAr, fieldAr); }
/** * This is a helper method that computes the cost and cardinality of joining joinToRemove to * joinSet (joinSet should contain joinToRemove), given that all of the subsets of size * joinSet.size() - 1 have already been computed and stored in PlanCache pc. * * @param stats table stats for all of the tables, referenced by table names rather than alias * (see {@link #orderJoins}) * @param filterSelectivities the selectivities of the filters over each of the tables (where * tables are indentified by their alias or name if no alias is given) * @param joinToRemove the join to remove from joinSet * @param joinSet the set of joins being considered * @param bestCostSoFar the best way to join joinSet so far (minimum of previous invocations of * computeCostAndCardOfSubplan for this joinSet, from returned CostCard) * @param pc the PlanCache for this join; should have subplans for all plans of size * joinSet.size()-1 * @return A {@link CostCard} objects desribing the cost, cardinality, optimal subplan * @throws ParsingException when stats, filterSelectivities, or pc object is missing tables * involved in join */ @SuppressWarnings("unchecked") private CostCard computeCostAndCardOfSubplan( HashMap<String, TableStats> stats, HashMap<String, Double> filterSelectivities, LogicalJoinNode joinToRemove, Set<LogicalJoinNode> joinSet, double bestCostSoFar, PlanCache pc) throws ParsingException { LogicalJoinNode j = joinToRemove; Vector<LogicalJoinNode> prevBest; if (this.p.getTableId(j.t1Alias) == null) throw new ParsingException("Unknown table " + j.t1Alias); if (this.p.getTableId(j.t2Alias) == null) throw new ParsingException("Unknown table " + j.t2Alias); String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias)); String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias)); String table1Alias = j.t1Alias; String table2Alias = j.t2Alias; Set<LogicalJoinNode> news = (Set<LogicalJoinNode>) ((HashSet<LogicalJoinNode>) joinSet).clone(); news.remove(j); double t1cost, t2cost; int t1card, t2card; boolean leftPkey, rightPkey; if (news.isEmpty()) { // base case -- both are base relations prevBest = new Vector<LogicalJoinNode>(); t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); t2cost = table2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = table2Alias == null ? 0 : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = table2Alias == null ? false : isPkey(table2Alias, j.f2PureName); } else { // news is not empty -- figure best way to join j to news prevBest = pc.getOrder(news); // possible that we have not cached an answer, if subset // includes a cross product if (prevBest == null) { return null; } double prevBestCost = pc.getCost(news); int bestCard = pc.getCard(news); // estimate cost of right subtree if (doesJoin(prevBest, table1Alias)) { // j.t1 is in prevBest t1cost = prevBestCost; // left side just has cost of whatever // left // subtree is t1card = bestCard; leftPkey = hasPkey(prevBest); t2cost = j.t2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = j.t2Alias == null ? 0 : stats .get(table2Name) .estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = j.t2Alias == null ? false : isPkey(j.t2Alias, j.f2PureName); } else if (doesJoin(prevBest, j.t2Alias)) { // j.t2 is in prevbest // (both // shouldn't be) t2cost = prevBestCost; // left side just has cost of whatever // left // subtree is t2card = bestCard; rightPkey = hasPkey(prevBest); t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); } else { // don't consider this plan if one of j.t1 or j.t2 // isn't a table joined in prevBest (cross product) return null; } } // case where prevbest is left double cost1 = estimateJoinCost(j, t1card, t2card, t1cost, t2cost); LogicalJoinNode j2 = j.swapInnerOuter(); double cost2 = estimateJoinCost(j2, t2card, t1card, t2cost, t1cost); if (cost2 < cost1) { boolean tmp; j = j2; cost1 = cost2; tmp = rightPkey; rightPkey = leftPkey; leftPkey = tmp; } if (cost1 >= bestCostSoFar) return null; CostCard cc = new CostCard(); cc.card = estimateJoinCardinality(j, t1card, t2card, leftPkey, rightPkey, stats); cc.cost = cost1; cc.plan = (Vector<LogicalJoinNode>) prevBest.clone(); cc.plan.addElement(j); // prevbest is left -- add new join to end return cc; }
public SeqScan(TransactionId tid, int tableid) { this(tid, tableid, Database.getCatalog().getTableName(tableid)); }
/** * @return return the table name of the table the operator scans. This should be the actual name * of the table in the catalog of the database */ public String getTableName() { return Database.getCatalog().getTableName(id); }
/** * Creates a sequential scan over the specified table as a part of the specified transaction. * * @param tid The transaction this scan is running as a part of. * @param tableid the table to scan. * @param tableAlias the alias of this table (needed by the parser); the returned tupleDesc should * have fields with name tableAlias.fieldName (note: this class is not responsible for * handling a case where tableAlias or fieldName are null. It shouldn't crash if they are, but * the resulting name can be null.fieldName, tableAlias.null, or null.null). */ public SeqScan(TransactionId tid, int tableid, String tableAlias) { alias = tableAlias; id = tableid; dbiter = Database.getCatalog().getDatabaseFile(id).iterator(tid); }
/** * Static method to generate a byte array corresponding to an empty HeapPage. Used to add new, * empty pages to the file. Passing the results of this method to the HeapPage constructor will * create a HeapPage with no valid tuples in it. * * @param tableid The id of the table that this empty page will belong to. * @return The returned ByteArray. */ public static byte[] createEmptyPageData(int tableid) { TupleDesc td = Database.getCatalog().getTupleDesc(tableid); // int hb = (((BufferPool.PAGE_SIZE / td.getSize()) / 32) +1) * 4; int len = BufferPool.PAGE_SIZE; // + hb; return new byte[len]; // all 0 }
/** * This is a helper method that computes the cost and cardinality of joining a LogicalJoinNode j * to the current greedy plan we have built up. * * @param j the join to try adding to our plan * @param plan the current plan we have built so far from the greedy algorithm, a Vector of * LogicalJoinNodes that we've so far chosen. * @param planCardinalities given the join order from plan, we also keep track of how large joined * tables are, so we can help estimate the cardinality and cost of this next join * @param planCosts given the join order from plan, we also keep track of how expensive executing * some joins are, so we can help estimate the cardinality and cost of this next join * @param stats table stats for all of the tables, referenced by table names rather than alias * (see {@link #orderGreedyJoins(HashMap, HashMap)}) * @param filterSelectivities the selectivities of the filters over each of the tables (where * tables are indentified by their alias or name if no alias is given) * @return A {@link CostCard} objects desribing the cost, cardinality, optimal subplan * @throws ParsingException when stats, filterSelectivities, or pc object is missing tables * involved in join */ private CostCard costGreedyJoin( LogicalJoinNode j, Vector<LogicalJoinNode> plan, Vector<Integer> planCardinalities, Vector<Double> planCosts, HashMap<String, TableStats> stats, HashMap<String, Double> filterSelectivities) throws ParsingException { if (this.p.getTableId(j.t1Alias) == null) throw new ParsingException("Unknown table " + j.t1Alias); if (this.p.getTableId(j.t2Alias) == null) throw new ParsingException("Unknown table " + j.t2Alias); String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias)); String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias)); String table1Alias = j.t1Alias; String table2Alias = j.t2Alias; double t1cost, t2cost; int t1card, t2card; boolean leftPkey, rightPkey; // estimate cost of right subtree if (doesJoin(plan, table1Alias)) { // j.t1 is in plan already CostCard c = getCostCard(plan, planCardinalities, planCosts, table1Alias); t1cost = c.cost; // left side just has cost of whatever left subtree is t1card = c.card; leftPkey = hasPkey(plan); t2cost = j.t2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = j.t2Alias == null ? 0 : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = j.t2Alias == null ? false : isPkey(j.t2Alias, j.f2PureName); } else if (doesJoin(plan, j.t2Alias)) { // j.t2 is in plan // (else if since both j.t1 and j.t2 shouldn't both be) CostCard c = getCostCard(plan, planCardinalities, planCosts, table2Alias); t2cost = c.cost; t2card = c.card; rightPkey = hasPkey(plan); t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); } else { // Neither is a plan, both are just single tables t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); t2cost = table2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = table2Alias == null ? 0 : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = table2Alias == null ? false : isPkey(table2Alias, j.f2PureName); } double cost1 = estimateJoinCost(j, t1card, t2card, t1cost, t2cost); LogicalJoinNode j2 = j.swapInnerOuter(); double cost2 = estimateJoinCost(j2, t2card, t1card, t2cost, t1cost); if (cost2 < cost1) { boolean tmp; j = j2; cost1 = cost2; tmp = rightPkey; rightPkey = leftPkey; leftPkey = tmp; } CostCard cc = new CostCard(); cc.card = estimateJoinCardinality(j, t1card, t2card, leftPkey, rightPkey, stats); cc.cost = cost1; return cc; }
/** * Return true if field is a primary key of the specified table, false otherwise * * @param tableAlias The alias of the table in the query * @param field The pure name of the field */ private boolean isPkey(String tableAlias, String field) { int tid1 = p.getTableId(tableAlias); String pkey1 = Database.getCatalog().getPrimaryKey(tid1); return pkey1.equals(field); }
/** * Helper function to display a Swing window with a tree representation of the specified list of * joins. See {@link #orderJoins}, which may want to call this when the analyze flag is true. * * @param js the join plan to visualize * @param pc the PlanCache accumulated whild building the optimal plan * @param stats table statistics for base tables * @param selectivities the selectivities of the filters over each of the tables (where tables are * indentified by their alias or name if no alias is given) */ private void printJoins( Vector<LogicalJoinNode> js, PlanCache pc, HashMap<String, TableStats> stats, HashMap<String, Double> selectivities) { JFrame f = new JFrame("Join Plan for " + p.getQuery()); // Set the default close operation for the window, // or else the program won't exit when clicking close button f.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); f.setVisible(true); f.setSize(300, 500); HashMap<String, DefaultMutableTreeNode> m = new HashMap<String, DefaultMutableTreeNode>(); // int numTabs = 0; // int k; DefaultMutableTreeNode root = null, treetop = null; HashSet<LogicalJoinNode> pathSoFar = new HashSet<LogicalJoinNode>(); boolean neither; System.out.println(js); for (LogicalJoinNode j : js) { pathSoFar.add(j); System.out.println("PATH SO FAR = " + pathSoFar); String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias)); String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias)); // Double c = pc.getCost(pathSoFar); neither = true; root = new DefaultMutableTreeNode( "Join " + j + " (Cost =" + pc.getCost(pathSoFar) + ", card = " + pc.getCard(pathSoFar) + ")"); DefaultMutableTreeNode n = m.get(j.t1Alias); if (n == null) { // never seen this table before n = new DefaultMutableTreeNode( j.t1Alias + " (Cost = " + stats.get(table1Name).estimateScanCost() + ", card = " + stats.get(table1Name).estimateTableCardinality(selectivities.get(j.t1Alias)) + ")"); root.add(n); } else { // make left child root n root.add(n); neither = false; } m.put(j.t1Alias, root); n = m.get(j.t2Alias); if (n == null) { // never seen this table before n = new DefaultMutableTreeNode( j.t2Alias == null ? "Subplan" : (j.t2Alias + " (Cost = " + stats.get(table2Name).estimateScanCost() + ", card = " + stats .get(table2Name) .estimateTableCardinality(selectivities.get(j.t2Alias)) + ")")); root.add(n); } else { // make right child root n root.add(n); neither = false; } m.put(j.t2Alias, root); // unless this table doesn't join with other tables, // all tables are accessed from root if (!neither) { for (String key : m.keySet()) { m.put(key, root); } } treetop = root; } JTree tree = new JTree(treetop); JScrollPane treeView = new JScrollPane(tree); tree.setShowsRootHandles(true); // Set the icon for leaf nodes. ImageIcon leafIcon = new ImageIcon("join.jpg"); DefaultTreeCellRenderer renderer = new DefaultTreeCellRenderer(); renderer.setOpenIcon(leafIcon); renderer.setClosedIcon(leafIcon); tree.setCellRenderer(renderer); f.setSize(300, 500); f.add(treeView); for (int i = 0; i < tree.getRowCount(); i++) { tree.expandRow(i); } if (js.size() == 0) { f.add(new JLabel("No joins in plan.")); } f.pack(); }
public void open() throws DbException, TransactionAbortedException { // some code goes here fileIt = Database.getCatalog().getDbFile(tableid).iterator(tid); fileIt.open(); }