/** Set up the test; create some initial tables to work with */ @Override @Before public void setUp() throws Exception { super.setUp(); // Create some sample tables to work with this.tuples1 = new ArrayList<ArrayList<Integer>>(); this.f1 = SystemTestUtil.createRandomHeapFile(10, 1000, 20, null, tuples1, "c"); this.tableName1 = "TA"; Database.getCatalog().addTable(f1, tableName1); this.tableId1 = Database.getCatalog().getTableId(tableName1); System.out.println("tableId1: " + tableId1); stats1 = new TableStats(tableId1, 19); TableStats.setTableStats(tableName1, stats1); this.tuples2 = new ArrayList<ArrayList<Integer>>(); this.f2 = SystemTestUtil.createRandomHeapFile(10, 10000, 20, null, tuples2, "c"); this.tableName2 = "TB"; Database.getCatalog().addTable(f2, tableName2); this.tableId2 = Database.getCatalog().getTableId(tableName2); System.out.println("tableId2: " + tableId2); stats2 = new TableStats(tableId2, 19); TableStats.setTableStats(tableName2, stats2); }
public static void computeStatistics() { Iterator<Integer> tableIt = Database.getCatalog().tableIdIterator(); System.out.println("Computing table stats."); while (tableIt.hasNext()) { int tableid = tableIt.next(); TableStats s = new TableStats(tableid, IOCOSTPERPAGE); setTableStats(Database.getCatalog().getTableName(tableid), s); } System.out.println("Done."); }
/** * Remove the specified tuple from the buffer pool. Will acquire a write lock on the page the * tuple is removed from. May block if the lock cannot be acquired. * * <p>Marks any pages that were dirtied by the operation as dirty by calling their markDirty bit. * Does not need to update cached versions of any pages that have been dirtied, as it is not * possible that a new page was created during the deletion (note difference from addTuple). * * @param tid the transaction deleting the tuple. * @param t the tuple to delete */ public void deleteTuple(TransactionId tid, Tuple t) throws DbException, TransactionAbortedException { HeapFile heapFile = (HeapFile) Database.getCatalog().getDatabaseFile(t.getRecordId().getPageId().getTableId()); Page dirtiedPage = heapFile.deleteTuple(tid, t); dirtiedPage.markDirty(true, tid); }
/** * Add a tuple to the specified table behalf of transaction tid. Will acquire a write lock on the * page the tuple is added to(Lock acquisition is not needed for lab2). May block if the lock * cannot be acquired. * * <p>Marks any pages that were dirtied by the operation as dirty by calling their markDirty bit, * and updates cached versions of any pages that have been dirtied so that future requests see * up-to-date pages. * * @param tid the transaction adding the tuple * @param tableId the table to add the tuple to * @param t the tuple to add */ public void insertTuple(TransactionId tid, int tableId, Tuple t) throws DbException, IOException, TransactionAbortedException { HeapFile heapFile = (HeapFile) Database.getCatalog().getDatabaseFile(tableId); List<Page> dirtiedPages = heapFile.insertTuple(tid, t); for (Page dirtiedPage : dirtiedPages) { dirtiedPage.markDirty(true, tid); } }
/** * Returns the TupleDesc with field names from the underlying HeapFile, prefixed with the * tableAlias string from the constructor. This prefix becomes useful when joining tables * containing a field(s) with the same name. * * @return the TupleDesc with field names from the underlying HeapFile, prefixed with the * tableAlias string from the constructor. */ public TupleDesc getTupleDesc() { TupleDesc tup = Database.getCatalog().getTupleDesc(id); int length = tup.numFields(); String[] field = new String[length]; Type[] types = new Type[length]; for (int i = 0; i < length; i++) { types[i] = tup.getFieldType(i); field[i] = alias + "." + tup.getFieldName(i); } return new TupleDesc(types, field); }
public void open() throws DbException, TransactionAbortedException { // some code goes here if (i_pos != null) { i = i_pos; } else { Catalog gc = Database.getCatalog(); HeapFile file = (HeapFile) gc.getDbFile(tableid); i = file.iterator(tid); } i.open(); }
/** * Create a new TableStats object, that keeps track of statistics on each column of a table * * @param tableid The table over which to compute statistics * @param ioCostPerPage The cost per page of IO. This doesn't differentiate between * sequential-scan IO and disk seeks. */ public TableStats(int tableid, int ioCostPerPage) { // For this function, we use the DbFile for the table in question, // then scan through its tuples and calculate the values that you // to build the histograms. // TODO: Fill out the rest of the constructor. // Feel free to change anything already written, it's only a guideline this.ioCostPerPage = ioCostPerPage; DbFile file = Database.getCatalog().getDbFile(tableid); tupleDesc = file.getTupleDesc(); numPages = ((HeapFile) file).numPages(); numTuples = 0; int numFields = tupleDesc.numFields(); // TODO: what goes here? statistics = new ArrayList<Object>(); for (int i = 0; i < numFields; i++) { if (Type.INT_TYPE.equals(tupleDesc.getFieldType(i))) { statistics.add(new IntStatistics(NUM_HIST_BINS)); } else { statistics.add(new StringHistogram(NUM_HIST_BINS)); } } final DbFileIterator iter = file.iterator(null); try { iter.open(); while (iter.hasNext()) { Tuple t = iter.next(); numTuples++; // TODO: and here? for (int i = 0; i < numFields; i++) { if (Type.INT_TYPE.equals(tupleDesc.getFieldType(i))) { ((IntStatistics) statistics.get(i)).addValue(((IntField) t.getField(i)).getValue()); } else { ((StringHistogram) statistics.get(i)) .addValue(((StringField) t.getField(i)).getValue()); } } } iter.close(); } catch (DbException e) { e.printStackTrace(); } catch (TransactionAbortedException e) { e.printStackTrace(); } }
/** * Flushes a certain page to disk * * @param pageId an ID indicating the page to flush */ private synchronized void flushPage(PageId pageId) throws IOException { if (pageIdToPages.containsKey(pageId)) { Page page = pageIdToPages.get(pageId); // append an update record to the log, with // a before-image and after-image. TransactionId dirtier = page.isDirty(); if (dirtier != null) { addDirtiedFlushedPage(dirtier, pageId); Database.getLogFile().logWrite(dirtier, page.getBeforeImage(), page); Database.getLogFile().force(); Database.getCatalog().getDatabaseFile(pageId.getTableId()).writePage(page); page.markDirty(false, null); } } }
/** * Retrieve the specified page with the associated permissions. Will acquire a lock and may block * if that lock is held by another transaction. * * <p>The retrieved page should be looked up in the buffer pool. If it is present, it should be * returned. If it is not present, it should be added to the buffer pool and returned. If there is * insufficient space in the buffer pool, an page should be evicted and the new page should be * added in its place. * * @param tid the ID of the transaction requesting the page * @param pid the ID of the requested page * @param perm the requested permissions on the page * @throws IOException * @throws NoSuchElementException */ public Page getPage(TransactionId tid, PageId pid, Permissions perm) throws TransactionAbortedException, DbException, NoSuchElementException, IOException { if (this.deadPool.containsKey(pid)) { return this.deadPool.get(pid); } else { if (this.deadPool.size() >= this.numPages) { throw new DbException("Too many pages!"); } else { this.deadPool.put( pid, Database.getCatalog().getDatabaseFile(pid.getTableId()).readPage(pid)); return this.deadPool.get(pid); } } }
/** * Retrieve the specified page with the associated permissions. Will acquire a lock and may block * if that lock is held by another transaction. * * <p>The retrieved page should be looked up in the buffer pool. If it is present, it should be * returned. If it is not present, it should be added to the buffer pool and returned. If there is * insufficient space in the buffer pool, an page should be evicted and the new page should be * added in its place. * * @param tid the ID of the transaction requesting the page * @param pid the ID of the requested page * @param perm the requested permissions on the page * @throws DbException * @throws TransactionAbortedException */ public Page getPage(TransactionId tid, PageId pid, Permissions perm) throws DbException, TransactionAbortedException { lockManager.acquireLock(tid, pid, perm); if (pageIdToPages.containsKey(pid)) { return pageIdToPages.get(pid); } if (currentPages.get() == maxPages) { evictPage(); } int tableId = pid.getTableId(); Catalog catalog = Database.getCatalog(); DbFile dbFile = catalog.getDatabaseFile(tableId); Page page = dbFile.readPage(pid); pageIdToPages.put(pid, page); currentPages.incrementAndGet(); return page; }
private void createAliasedTd() { Catalog gc = Database.getCatalog(); TupleDesc old_td = gc.getTupleDesc(tableid); String[] newFieldAr = new String[old_td.numFields()]; Type[] typeAr = new Type[old_td.numFields()]; String field = null; for (int i = 0; i < newFieldAr.length; i++) { field = old_td.getFieldName(i); if (alias == null) { alias = "null"; } else if (field == null) { field = "null"; } newFieldAr[i] = alias + "." + field; typeAr[i] = old_td.getFieldType(i); } td = new TupleDesc(typeAr, newFieldAr); }
/** * Create a HeapPage from a set of bytes of data read from disk. The format of a HeapPage is a set * of header bytes indicating the slots of the page that are in use, some number of tuple slots. * Specifically, the number of tuples is equal to: * * <p>floor((BufferPool.PAGE_SIZE*8) / (tuple size * 8 + 1)) * * <p>where tuple size is the size of tuples in this database table, which can be determined via * {@link Catalog#getTupleDesc}. The number of 8-bit header words is equal to: * * <p>ceiling(no. tuple slots / 8) * * <p> * * @see Database#getCatalog * @see Catalog#getTupleDesc * @see BufferPool#PAGE_SIZE */ public HeapPage(HeapPageId id, byte[] data) throws IOException { this.pid = id; this.td = Database.getCatalog().getTupleDesc(id.getTableId()); this.numSlots = getNumTuples(); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data)); // allocate and read the header slots of this page header = new byte[getHeaderSize()]; for (int i = 0; i < header.length; i++) header[i] = dis.readByte(); try { // allocate and read the actual records of this page tuples = new Tuple[numSlots]; for (int i = 0; i < tuples.length; i++) tuples[i] = readNextTuple(dis, i); } catch (NoSuchElementException e) { e.printStackTrace(); } dis.close(); setBeforeImage(); }
/** * Create a HeapPage from a set of bytes of data read from disk. The format of a HeapPage is a set * of 32-bit header words indicating the slots of the page that are in use, plus * (BufferPool.PAGE_SIZE/tuple size) tuple slots, where tuple size is the size of tuples in this * database table, which can be determined via {@link Catalog#getTupleDesc}. * * <p>The number of 32-bit header words is equal to: * * <p>(no. tuple slots / 32) + 1 * * <p> * * @see Database#getCatalog * @see Catalog#getTupleDesc * @see BufferPool#PAGE_SIZE */ public HeapPage(HeapPageId id, byte[] data) throws IOException { this.pid = id; this.td = Database.getCatalog().getTupleDesc(id.tableid()); // this.numSlots = (BufferPool.PAGE_SIZE) / (td.getSize()); this.numSlots = (BufferPool.PAGE_SIZE * 8) / ((td.getSize() * 8) + 1); // System.out.println(this.numSlots); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data)); // allocate and read the header slots of this page header = new Header(dis); try { // allocate and read the actual records of this page tuples = new Tuple[numSlots]; for (int i = 0; i < numSlots; i++) { tuples[i] = readNextTuple(dis, i); } } catch (NoSuchElementException e) { // e.printStackTrace(); } dis.close(); }
/** * Returns the TupleDesc with field names from the underlying HeapFile, prefixed with the * tableAlias string from the constructor. This prefix becomes useful when joining tables * containing a field(s) with the same name. * * @return the TupleDesc with field names from the underlying HeapFile, prefixed with the * tableAlias string from the constructor. */ public TupleDesc getTupleDesc() { // some code goes here TupleDesc td = Database.getCatalog().getTupleDesc(tableid); Iterator<TDItem> tdIter = td.iterator(); int size = td.numFields(); Type[] typeAr = new Type[size]; String[] fieldAr = new String[size]; String aliasString = this.tableAlias; TDItem item; Type fieldType; String fieldName; int count = 0; if (aliasString == null) { aliasString = "null"; } // for (int i = 0; i < size; i++){ // item = tdIter.next(); // fieldType = item.fieldType; // fieldName = item.fieldName; while (tdIter.hasNext()) { item = tdIter.next(); fieldType = item.fieldType; fieldName = item.fieldName; if (fieldName == null) { fieldName = "null"; } typeAr[count] = fieldType; fieldAr[count] = aliasString + "." + fieldName; // "null.null case may occur" count++; } return new TupleDesc(typeAr, fieldAr); }
/** * Creates a sequential scan over the specified table as a part of the specified transaction. * * @param tid The transaction this scan is running as a part of. * @param tableid the table to scan. * @param tableAlias the alias of this table (needed by the parser); the returned tupleDesc should * have fields with name tableAlias.fieldName (note: this class is not responsible for * handling a case where tableAlias or fieldName are null. It shouldn't crash if they are, but * the resulting name can be null.fieldName, tableAlias.null, or null.null). */ public SeqScan(TransactionId tid, int tableid, String tableAlias) { alias = tableAlias; id = tableid; dbiter = Database.getCatalog().getDatabaseFile(id).iterator(tid); }
/** * Static method to generate a byte array corresponding to an empty HeapPage. Used to add new, * empty pages to the file. Passing the results of this method to the HeapPage constructor will * create a HeapPage with no valid tuples in it. * * @param tableid The id of the table that this empty page will belong to. * @return The returned ByteArray. */ public static byte[] createEmptyPageData(int tableid) { TupleDesc td = Database.getCatalog().getTupleDesc(tableid); // int hb = (((BufferPool.PAGE_SIZE / td.getSize()) / 32) +1) * 4; int len = BufferPool.PAGE_SIZE; // + hb; return new byte[len]; // all 0 }
public static void main(String[] args) throws Exception { if (args.length < 1 || args.length > 5) { System.out.println("Invalid number of arguments.\n" + usage); return; } String confDir = Server.DEFAULT_CONF_DIR; String outputDir = DEFAULT_OUTPUT_DIR; if (args.length >= 3 && args[1].equals("--conf")) { confDir = args[2]; args = ParallelUtility.removeArg(args, 1); args = ParallelUtility.removeArg(args, 1); } if (args.length >= 3 && args[1].equals("--output")) { outputDir = args[2]; args = ParallelUtility.removeArg(args, 1); args = ParallelUtility.removeArg(args, 1); } Catalog c = Database.getCatalog(); SocketInfo[] workers = ParallelUtility.loadWorkers(confDir); c.loadSchema(args[0]); TableStats.computeStatistics(); File catalogFile = new File(args[0]); for (SocketInfo worker : workers) { File folder = new File(outputDir + "/" + worker.getHost() + "_" + worker.getPort()); folder.mkdirs(); ParallelUtility.copyFileFolder( catalogFile, new File(folder.getAbsolutePath() + "/catalog.schema"), true); } TransactionId fateTid = new TransactionId(); Iterator<Integer> tableIds = c.tableIdIterator(); while (tableIds.hasNext()) { int tableid = tableIds.next(); int numTuples = getTotalTuples(tableid); HeapFile h = (HeapFile) c.getDatabaseFile(tableid); int eachSplitSize = numTuples / workers.length; int[] splitSizes = new int[workers.length]; Arrays.fill(splitSizes, eachSplitSize); for (int i = 0; i < numTuples % workers.length; i++) { splitSizes[i] += 1; } DbFileIterator dfi = h.iterator(fateTid); dfi.open(); for (int i = 0; i < workers.length; i++) { ArrayList<Tuple> buffer = new ArrayList<Tuple>(); for (int j = 0; j < splitSizes[i]; j++) { dfi.hasNext(); buffer.add(dfi.next()); } Iterator<TDItem> items = h.getTupleDesc().iterator(); ArrayList<Type> types = new ArrayList<Type>(); while (items.hasNext()) types.add(items.next().fieldType); writeHeapFile( buffer, new File( outputDir + "/" + workers[i].getHost() + "_" + workers[i].getPort() + "/" + c.getTableName(tableid) + ".dat"), BufferPool.getPageSize(), types.toArray(new Type[] {})); } } }
/** * This is a helper method that computes the cost and cardinality of joining a LogicalJoinNode j * to the current greedy plan we have built up. * * @param j the join to try adding to our plan * @param plan the current plan we have built so far from the greedy algorithm, a Vector of * LogicalJoinNodes that we've so far chosen. * @param planCardinalities given the join order from plan, we also keep track of how large joined * tables are, so we can help estimate the cardinality and cost of this next join * @param planCosts given the join order from plan, we also keep track of how expensive executing * some joins are, so we can help estimate the cardinality and cost of this next join * @param stats table stats for all of the tables, referenced by table names rather than alias * (see {@link #orderGreedyJoins(HashMap, HashMap)}) * @param filterSelectivities the selectivities of the filters over each of the tables (where * tables are indentified by their alias or name if no alias is given) * @return A {@link CostCard} objects desribing the cost, cardinality, optimal subplan * @throws ParsingException when stats, filterSelectivities, or pc object is missing tables * involved in join */ private CostCard costGreedyJoin( LogicalJoinNode j, Vector<LogicalJoinNode> plan, Vector<Integer> planCardinalities, Vector<Double> planCosts, HashMap<String, TableStats> stats, HashMap<String, Double> filterSelectivities) throws ParsingException { if (this.p.getTableId(j.t1Alias) == null) throw new ParsingException("Unknown table " + j.t1Alias); if (this.p.getTableId(j.t2Alias) == null) throw new ParsingException("Unknown table " + j.t2Alias); String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias)); String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias)); String table1Alias = j.t1Alias; String table2Alias = j.t2Alias; double t1cost, t2cost; int t1card, t2card; boolean leftPkey, rightPkey; // estimate cost of right subtree if (doesJoin(plan, table1Alias)) { // j.t1 is in plan already CostCard c = getCostCard(plan, planCardinalities, planCosts, table1Alias); t1cost = c.cost; // left side just has cost of whatever left subtree is t1card = c.card; leftPkey = hasPkey(plan); t2cost = j.t2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = j.t2Alias == null ? 0 : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = j.t2Alias == null ? false : isPkey(j.t2Alias, j.f2PureName); } else if (doesJoin(plan, j.t2Alias)) { // j.t2 is in plan // (else if since both j.t1 and j.t2 shouldn't both be) CostCard c = getCostCard(plan, planCardinalities, planCosts, table2Alias); t2cost = c.cost; t2card = c.card; rightPkey = hasPkey(plan); t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); } else { // Neither is a plan, both are just single tables t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); t2cost = table2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = table2Alias == null ? 0 : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = table2Alias == null ? false : isPkey(table2Alias, j.f2PureName); } double cost1 = estimateJoinCost(j, t1card, t2card, t1cost, t2cost); LogicalJoinNode j2 = j.swapInnerOuter(); double cost2 = estimateJoinCost(j2, t2card, t1card, t2cost, t1cost); if (cost2 < cost1) { boolean tmp; j = j2; cost1 = cost2; tmp = rightPkey; rightPkey = leftPkey; leftPkey = tmp; } CostCard cc = new CostCard(); cc.card = estimateJoinCardinality(j, t1card, t2card, leftPkey, rightPkey, stats); cc.cost = cost1; return cc; }
/** * Test a join ordering with an inequality, to make sure the inequality gets put as the innermost * join */ @Test public void nonequalityOrderJoinsTest() throws IOException, ParsingException { final int IO_COST = 103; JoinOptimizer j; HashMap<String, TableStats> stats = new HashMap<String, TableStats>(); Vector<LogicalJoinNode> result; Vector<LogicalJoinNode> nodes = new Vector<LogicalJoinNode>(); HashMap<String, Double> filterSelectivities = new HashMap<String, Double>(); TransactionId tid = new TransactionId(); // Create a large set of tables, and add tuples to the tables ArrayList<ArrayList<Integer>> smallHeapFileTuples = new ArrayList<ArrayList<Integer>>(); HeapFile smallHeapFileA = SystemTestUtil.createRandomHeapFile( 2, 100, Integer.MAX_VALUE, null, smallHeapFileTuples, "c"); HeapFile smallHeapFileB = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileC = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileD = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileE = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileF = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileG = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileH = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileI = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); // Add the tables to the database Database.getCatalog().addTable(smallHeapFileA, "a"); Database.getCatalog().addTable(smallHeapFileB, "b"); Database.getCatalog().addTable(smallHeapFileC, "c"); Database.getCatalog().addTable(smallHeapFileD, "d"); Database.getCatalog().addTable(smallHeapFileE, "e"); Database.getCatalog().addTable(smallHeapFileF, "f"); Database.getCatalog().addTable(smallHeapFileG, "g"); Database.getCatalog().addTable(smallHeapFileH, "h"); Database.getCatalog().addTable(smallHeapFileI, "i"); // Come up with join statistics for the tables stats.put("a", new TableStats(smallHeapFileA.getId(), IO_COST)); stats.put("b", new TableStats(smallHeapFileB.getId(), IO_COST)); stats.put("c", new TableStats(smallHeapFileC.getId(), IO_COST)); stats.put("d", new TableStats(smallHeapFileD.getId(), IO_COST)); stats.put("e", new TableStats(smallHeapFileE.getId(), IO_COST)); stats.put("f", new TableStats(smallHeapFileF.getId(), IO_COST)); stats.put("g", new TableStats(smallHeapFileG.getId(), IO_COST)); stats.put("h", new TableStats(smallHeapFileH.getId(), IO_COST)); stats.put("i", new TableStats(smallHeapFileI.getId(), IO_COST)); // Put in some filter selectivities filterSelectivities.put("a", Double.valueOf(1.0)); filterSelectivities.put("b", Double.valueOf(1.0)); filterSelectivities.put("c", Double.valueOf(1.0)); filterSelectivities.put("d", Double.valueOf(1.0)); filterSelectivities.put("e", Double.valueOf(1.0)); filterSelectivities.put("f", Double.valueOf(1.0)); filterSelectivities.put("g", Double.valueOf(1.0)); filterSelectivities.put("h", Double.valueOf(1.0)); filterSelectivities.put("i", Double.valueOf(1.0)); // Add the nodes to a collection for a query plan nodes.add(new LogicalJoinNode("a", "b", "c1", "c1", Predicate.Op.LESS_THAN)); nodes.add(new LogicalJoinNode("b", "c", "c0", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("c", "d", "c1", "c1", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("d", "e", "c0", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("e", "f", "c1", "c1", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("f", "g", "c0", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("g", "h", "c1", "c1", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("h", "i", "c0", "c0", Predicate.Op.EQUALS)); Parser p = new Parser(); // Run the optimizer; see what results we get back j = new JoinOptimizer( p.generateLogicalPlan( tid, "SELECT COUNT(a.c0) FROM a, b, c, d,e,f,g,h,i WHERE a.c1 < b.c1 AND b.c0 = c.c0 AND c.c1 = d.c1 AND d.c0 = e.c0 AND e.c1 = f.c1 AND f.c0 = g.c0 AND g.c1 = h.c1 AND h.c0 = i.c0;"), nodes); // Set the last boolean here to 'true' in order to have orderJoins() // print out its logic result = j.orderJoins(stats, filterSelectivities, false); // If you're only re-ordering the join nodes, // you shouldn't end up with more than you started with Assert.assertEquals(result.size(), nodes.size()); // Make sure that "a" is the outermost table in the join Assert.assertTrue( result.get(result.size() - 1).t2Alias.equals("a") || result.get(result.size() - 1).t1Alias.equals("a")); }
/** * Return true if field is a primary key of the specified table, false otherwise * * @param tableAlias The alias of the table in the query * @param field The pure name of the field */ private boolean isPkey(String tableAlias, String field) { int tid1 = p.getTableId(tableAlias); String pkey1 = Database.getCatalog().getPrimaryKey(tid1); return pkey1.equals(field); }
/** * @return return the table name of the table the operator scans. This should be the actual name * of the table in the catalog of the database */ public String getTableName() { return Database.getCatalog().getTableName(id); }
public SeqScan(TransactionId tid, int tableid) { this(tid, tableid, Database.getCatalog().getTableName(tableid)); }
/** * Determine whether the orderJoins implementation is doing a reasonable job of ordering joins, * and not taking an unreasonable amount of time to do so */ @Test public void orderJoinsTest() throws ParsingException, IOException { // This test is intended to approximate the join described in the // "Query Planning" section of 2009 Quiz 1, // though with some minor variation due to limitations in simpledb // and to only test your integer-heuristic code rather than // string-heuristic code. final int IO_COST = 101; // Create a whole bunch of variables that we're going to use TransactionId tid = new TransactionId(); JoinOptimizer j; Vector<LogicalJoinNode> result; Vector<LogicalJoinNode> nodes = new Vector<LogicalJoinNode>(); HashMap<String, TableStats> stats = new HashMap<String, TableStats>(); HashMap<String, Double> filterSelectivities = new HashMap<String, Double>(); // Create all of the tables, and add them to the catalog ArrayList<ArrayList<Integer>> empTuples = new ArrayList<ArrayList<Integer>>(); HeapFile emp = SystemTestUtil.createRandomHeapFile(6, 100000, null, empTuples, "c"); Database.getCatalog().addTable(emp, "emp"); ArrayList<ArrayList<Integer>> deptTuples = new ArrayList<ArrayList<Integer>>(); HeapFile dept = SystemTestUtil.createRandomHeapFile(3, 1000, null, deptTuples, "c"); Database.getCatalog().addTable(dept, "dept"); ArrayList<ArrayList<Integer>> hobbyTuples = new ArrayList<ArrayList<Integer>>(); HeapFile hobby = SystemTestUtil.createRandomHeapFile(6, 1000, null, hobbyTuples, "c"); Database.getCatalog().addTable(hobby, "hobby"); ArrayList<ArrayList<Integer>> hobbiesTuples = new ArrayList<ArrayList<Integer>>(); HeapFile hobbies = SystemTestUtil.createRandomHeapFile(2, 200000, null, hobbiesTuples, "c"); Database.getCatalog().addTable(hobbies, "hobbies"); // Get TableStats objects for each of the tables that we just generated. stats.put("emp", new TableStats(Database.getCatalog().getTableId("emp"), IO_COST)); stats.put("dept", new TableStats(Database.getCatalog().getTableId("dept"), IO_COST)); stats.put("hobby", new TableStats(Database.getCatalog().getTableId("hobby"), IO_COST)); stats.put("hobbies", new TableStats(Database.getCatalog().getTableId("hobbies"), IO_COST)); // Note that your code shouldn't re-compute selectivities. // If you get statistics numbers, even if they're wrong (which they are // here // because the data is random), you should use the numbers that you are // given. // Re-computing them at runtime is generally too expensive for complex // queries. filterSelectivities.put("emp", Double.valueOf(0.1)); filterSelectivities.put("dept", Double.valueOf(1.0)); filterSelectivities.put("hobby", Double.valueOf(1.0)); filterSelectivities.put("hobbies", Double.valueOf(1.0)); // Note that there's no particular guarantee that the LogicalJoinNode's // will be in // the same order as they were written in the query. // They just have to be in an order that uses the same operators and // semantically means the same thing. nodes.add(new LogicalJoinNode("hobbies", "hobby", "c1", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("emp", "dept", "c1", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("emp", "hobbies", "c2", "c0", Predicate.Op.EQUALS)); Parser p = new Parser(); j = new JoinOptimizer( p.generateLogicalPlan( tid, "SELECT * FROM emp,dept,hobbies,hobby WHERE emp.c1 = dept.c0 AND hobbies.c0 = emp.c2 AND hobbies.c1 = hobby.c0 AND e.c3 < 1000;"), nodes); // Set the last boolean here to 'true' in order to have orderJoins() // print out its logic result = j.orderJoins(stats, filterSelectivities, false); // There are only three join nodes; if you're only re-ordering the join // nodes, // you shouldn't end up with more than you started with Assert.assertEquals(result.size(), nodes.size()); // There were a number of ways to do the query in this quiz, reasonably // well; // we're just doing a heuristics-based optimizer, so, only ignore the // really // bad case where "hobbies" is the outermost node in the left-deep tree. Assert.assertFalse(result.get(0).t1Alias == "hobbies"); // Also check for some of the other silly cases, like forcing a cross // join by // "hobbies" only being at the two extremes, or "hobbies" being the // outermost table. Assert.assertFalse( result.get(2).t2Alias == "hobbies" && (result.get(0).t1Alias == "hobbies" || result.get(0).t2Alias == "hobbies")); }
/** * This is a helper method that computes the cost and cardinality of joining joinToRemove to * joinSet (joinSet should contain joinToRemove), given that all of the subsets of size * joinSet.size() - 1 have already been computed and stored in PlanCache pc. * * @param stats table stats for all of the tables, referenced by table names rather than alias * (see {@link #orderJoins}) * @param filterSelectivities the selectivities of the filters over each of the tables (where * tables are indentified by their alias or name if no alias is given) * @param joinToRemove the join to remove from joinSet * @param joinSet the set of joins being considered * @param bestCostSoFar the best way to join joinSet so far (minimum of previous invocations of * computeCostAndCardOfSubplan for this joinSet, from returned CostCard) * @param pc the PlanCache for this join; should have subplans for all plans of size * joinSet.size()-1 * @return A {@link CostCard} objects desribing the cost, cardinality, optimal subplan * @throws ParsingException when stats, filterSelectivities, or pc object is missing tables * involved in join */ @SuppressWarnings("unchecked") private CostCard computeCostAndCardOfSubplan( HashMap<String, TableStats> stats, HashMap<String, Double> filterSelectivities, LogicalJoinNode joinToRemove, Set<LogicalJoinNode> joinSet, double bestCostSoFar, PlanCache pc) throws ParsingException { LogicalJoinNode j = joinToRemove; Vector<LogicalJoinNode> prevBest; if (this.p.getTableId(j.t1Alias) == null) throw new ParsingException("Unknown table " + j.t1Alias); if (this.p.getTableId(j.t2Alias) == null) throw new ParsingException("Unknown table " + j.t2Alias); String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias)); String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias)); String table1Alias = j.t1Alias; String table2Alias = j.t2Alias; Set<LogicalJoinNode> news = (Set<LogicalJoinNode>) ((HashSet<LogicalJoinNode>) joinSet).clone(); news.remove(j); double t1cost, t2cost; int t1card, t2card; boolean leftPkey, rightPkey; if (news.isEmpty()) { // base case -- both are base relations prevBest = new Vector<LogicalJoinNode>(); t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); t2cost = table2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = table2Alias == null ? 0 : stats.get(table2Name).estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = table2Alias == null ? false : isPkey(table2Alias, j.f2PureName); } else { // news is not empty -- figure best way to join j to news prevBest = pc.getOrder(news); // possible that we have not cached an answer, if subset // includes a cross product if (prevBest == null) { return null; } double prevBestCost = pc.getCost(news); int bestCard = pc.getCard(news); // estimate cost of right subtree if (doesJoin(prevBest, table1Alias)) { // j.t1 is in prevBest t1cost = prevBestCost; // left side just has cost of whatever // left // subtree is t1card = bestCard; leftPkey = hasPkey(prevBest); t2cost = j.t2Alias == null ? 0 : stats.get(table2Name).estimateScanCost(); t2card = j.t2Alias == null ? 0 : stats .get(table2Name) .estimateTableCardinality(filterSelectivities.get(j.t2Alias)); rightPkey = j.t2Alias == null ? false : isPkey(j.t2Alias, j.f2PureName); } else if (doesJoin(prevBest, j.t2Alias)) { // j.t2 is in prevbest // (both // shouldn't be) t2cost = prevBestCost; // left side just has cost of whatever // left // subtree is t2card = bestCard; rightPkey = hasPkey(prevBest); t1cost = stats.get(table1Name).estimateScanCost(); t1card = stats.get(table1Name).estimateTableCardinality(filterSelectivities.get(j.t1Alias)); leftPkey = isPkey(j.t1Alias, j.f1PureName); } else { // don't consider this plan if one of j.t1 or j.t2 // isn't a table joined in prevBest (cross product) return null; } } // case where prevbest is left double cost1 = estimateJoinCost(j, t1card, t2card, t1cost, t2cost); LogicalJoinNode j2 = j.swapInnerOuter(); double cost2 = estimateJoinCost(j2, t2card, t1card, t2cost, t1cost); if (cost2 < cost1) { boolean tmp; j = j2; cost1 = cost2; tmp = rightPkey; rightPkey = leftPkey; leftPkey = tmp; } if (cost1 >= bestCostSoFar) return null; CostCard cc = new CostCard(); cc.card = estimateJoinCardinality(j, t1card, t2card, leftPkey, rightPkey, stats); cc.cost = cost1; cc.plan = (Vector<LogicalJoinNode>) prevBest.clone(); cc.plan.addElement(j); // prevbest is left -- add new join to end return cc; }
/** * Test a much-larger join ordering, to confirm that it executes in a reasonable amount of time */ @Test(timeout = 60000) public void bigOrderJoinsTest() throws IOException, ParsingException { final int IO_COST = 103; JoinOptimizer j; HashMap<String, TableStats> stats = new HashMap<String, TableStats>(); Vector<LogicalJoinNode> result; Vector<LogicalJoinNode> nodes = new Vector<LogicalJoinNode>(); HashMap<String, Double> filterSelectivities = new HashMap<String, Double>(); TransactionId tid = new TransactionId(); // Create a large set of tables, and add tuples to the tables ArrayList<ArrayList<Integer>> smallHeapFileTuples = new ArrayList<ArrayList<Integer>>(); HeapFile smallHeapFileA = SystemTestUtil.createRandomHeapFile( 2, 100, Integer.MAX_VALUE, null, smallHeapFileTuples, "c"); HeapFile smallHeapFileB = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileC = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileD = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileE = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileF = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileG = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileH = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileI = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileJ = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileK = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileL = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileM = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); HeapFile smallHeapFileN = createDuplicateHeapFile(smallHeapFileTuples, 2, "c"); ArrayList<ArrayList<Integer>> bigHeapFileTuples = new ArrayList<ArrayList<Integer>>(); for (int i = 0; i < 100000; i++) { bigHeapFileTuples.add(smallHeapFileTuples.get(i % 100)); } HeapFile bigHeapFile = createDuplicateHeapFile(bigHeapFileTuples, 2, "c"); Database.getCatalog().addTable(bigHeapFile, "bigTable"); // Add the tables to the database Database.getCatalog().addTable(bigHeapFile, "bigTable"); Database.getCatalog().addTable(smallHeapFileA, "a"); Database.getCatalog().addTable(smallHeapFileB, "b"); Database.getCatalog().addTable(smallHeapFileC, "c"); Database.getCatalog().addTable(smallHeapFileD, "d"); Database.getCatalog().addTable(smallHeapFileE, "e"); Database.getCatalog().addTable(smallHeapFileF, "f"); Database.getCatalog().addTable(smallHeapFileG, "g"); Database.getCatalog().addTable(smallHeapFileH, "h"); Database.getCatalog().addTable(smallHeapFileI, "i"); Database.getCatalog().addTable(smallHeapFileJ, "j"); Database.getCatalog().addTable(smallHeapFileK, "k"); Database.getCatalog().addTable(smallHeapFileL, "l"); Database.getCatalog().addTable(smallHeapFileM, "m"); Database.getCatalog().addTable(smallHeapFileN, "n"); // Come up with join statistics for the tables stats.put("bigTable", new TableStats(bigHeapFile.getId(), IO_COST)); stats.put("a", new TableStats(smallHeapFileA.getId(), IO_COST)); stats.put("b", new TableStats(smallHeapFileB.getId(), IO_COST)); stats.put("c", new TableStats(smallHeapFileC.getId(), IO_COST)); stats.put("d", new TableStats(smallHeapFileD.getId(), IO_COST)); stats.put("e", new TableStats(smallHeapFileE.getId(), IO_COST)); stats.put("f", new TableStats(smallHeapFileF.getId(), IO_COST)); stats.put("g", new TableStats(smallHeapFileG.getId(), IO_COST)); stats.put("h", new TableStats(smallHeapFileG.getId(), IO_COST)); stats.put("i", new TableStats(smallHeapFileG.getId(), IO_COST)); stats.put("j", new TableStats(smallHeapFileG.getId(), IO_COST)); stats.put("k", new TableStats(smallHeapFileG.getId(), IO_COST)); stats.put("l", new TableStats(smallHeapFileG.getId(), IO_COST)); stats.put("m", new TableStats(smallHeapFileG.getId(), IO_COST)); stats.put("n", new TableStats(smallHeapFileG.getId(), IO_COST)); // Put in some filter selectivities filterSelectivities.put("bigTable", Double.valueOf(1.0)); filterSelectivities.put("a", Double.valueOf(1.0)); filterSelectivities.put("b", Double.valueOf(1.0)); filterSelectivities.put("c", Double.valueOf(1.0)); filterSelectivities.put("d", Double.valueOf(1.0)); filterSelectivities.put("e", Double.valueOf(1.0)); filterSelectivities.put("f", Double.valueOf(1.0)); filterSelectivities.put("g", Double.valueOf(1.0)); filterSelectivities.put("h", Double.valueOf(1.0)); filterSelectivities.put("i", Double.valueOf(1.0)); filterSelectivities.put("j", Double.valueOf(1.0)); filterSelectivities.put("k", Double.valueOf(1.0)); filterSelectivities.put("l", Double.valueOf(1.0)); filterSelectivities.put("m", Double.valueOf(1.0)); filterSelectivities.put("n", Double.valueOf(1.0)); // Add the nodes to a collection for a query plan nodes.add(new LogicalJoinNode("a", "b", "c1", "c1", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("b", "c", "c0", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("c", "d", "c1", "c1", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("d", "e", "c0", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("e", "f", "c1", "c1", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("f", "g", "c0", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("g", "h", "c1", "c1", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("h", "i", "c0", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("i", "j", "c1", "c1", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("j", "k", "c0", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("k", "l", "c1", "c1", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("l", "m", "c0", "c0", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("m", "n", "c1", "c1", Predicate.Op.EQUALS)); nodes.add(new LogicalJoinNode("n", "bigTable", "c0", "c0", Predicate.Op.EQUALS)); // Make sure we don't give the nodes to the optimizer in a nice order Collections.shuffle(nodes); Parser p = new Parser(); j = new JoinOptimizer( p.generateLogicalPlan( tid, "SELECT COUNT(a.c0) FROM bigTable, a, b, c, d, e, f, g, h, i, j, k, l, m, n WHERE bigTable.c0 = n.c0 AND a.c1 = b.c1 AND b.c0 = c.c0 AND c.c1 = d.c1 AND d.c0 = e.c0 AND e.c1 = f.c1 AND f.c0 = g.c0 AND g.c1 = h.c1 AND h.c0 = i.c0 AND i.c1 = j.c1 AND j.c0 = k.c0 AND k.c1 = l.c1 AND l.c0 = m.c0 AND m.c1 = n.c1;"), nodes); // Set the last boolean here to 'true' in order to have orderJoins() // print out its logic result = j.orderJoins(stats, filterSelectivities, false); // If you're only re-ordering the join nodes, // you shouldn't end up with more than you started with Assert.assertEquals(result.size(), nodes.size()); // Make sure that "bigTable" is the outermost table in the join Assert.assertEquals(result.get(result.size() - 1).t2Alias, "bigTable"); }
/** * Helper function to display a Swing window with a tree representation of the specified list of * joins. See {@link #orderJoins}, which may want to call this when the analyze flag is true. * * @param js the join plan to visualize * @param pc the PlanCache accumulated whild building the optimal plan * @param stats table statistics for base tables * @param selectivities the selectivities of the filters over each of the tables (where tables are * indentified by their alias or name if no alias is given) */ private void printJoins( Vector<LogicalJoinNode> js, PlanCache pc, HashMap<String, TableStats> stats, HashMap<String, Double> selectivities) { JFrame f = new JFrame("Join Plan for " + p.getQuery()); // Set the default close operation for the window, // or else the program won't exit when clicking close button f.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); f.setVisible(true); f.setSize(300, 500); HashMap<String, DefaultMutableTreeNode> m = new HashMap<String, DefaultMutableTreeNode>(); // int numTabs = 0; // int k; DefaultMutableTreeNode root = null, treetop = null; HashSet<LogicalJoinNode> pathSoFar = new HashSet<LogicalJoinNode>(); boolean neither; System.out.println(js); for (LogicalJoinNode j : js) { pathSoFar.add(j); System.out.println("PATH SO FAR = " + pathSoFar); String table1Name = Database.getCatalog().getTableName(this.p.getTableId(j.t1Alias)); String table2Name = Database.getCatalog().getTableName(this.p.getTableId(j.t2Alias)); // Double c = pc.getCost(pathSoFar); neither = true; root = new DefaultMutableTreeNode( "Join " + j + " (Cost =" + pc.getCost(pathSoFar) + ", card = " + pc.getCard(pathSoFar) + ")"); DefaultMutableTreeNode n = m.get(j.t1Alias); if (n == null) { // never seen this table before n = new DefaultMutableTreeNode( j.t1Alias + " (Cost = " + stats.get(table1Name).estimateScanCost() + ", card = " + stats.get(table1Name).estimateTableCardinality(selectivities.get(j.t1Alias)) + ")"); root.add(n); } else { // make left child root n root.add(n); neither = false; } m.put(j.t1Alias, root); n = m.get(j.t2Alias); if (n == null) { // never seen this table before n = new DefaultMutableTreeNode( j.t2Alias == null ? "Subplan" : (j.t2Alias + " (Cost = " + stats.get(table2Name).estimateScanCost() + ", card = " + stats .get(table2Name) .estimateTableCardinality(selectivities.get(j.t2Alias)) + ")")); root.add(n); } else { // make right child root n root.add(n); neither = false; } m.put(j.t2Alias, root); // unless this table doesn't join with other tables, // all tables are accessed from root if (!neither) { for (String key : m.keySet()) { m.put(key, root); } } treetop = root; } JTree tree = new JTree(treetop); JScrollPane treeView = new JScrollPane(tree); tree.setShowsRootHandles(true); // Set the icon for leaf nodes. ImageIcon leafIcon = new ImageIcon("join.jpg"); DefaultTreeCellRenderer renderer = new DefaultTreeCellRenderer(); renderer.setOpenIcon(leafIcon); renderer.setClosedIcon(leafIcon); tree.setCellRenderer(renderer); f.setSize(300, 500); f.add(treeView); for (int i = 0; i < tree.getRowCount(); i++) { tree.expandRow(i); } if (js.size() == 0) { f.add(new JLabel("No joins in plan.")); } f.pack(); }
public void open() throws DbException, TransactionAbortedException { // some code goes here fileIt = Database.getCatalog().getDbFile(tableid).iterator(tid); fileIt.open(); }