/** Insert rows through staging phase */ private void insertRowsThroughStaging( TaskAttemptContext taskAttemptContext, InsertNode insertNode, Path finalOutputPath, Path stagingDir, Path stagingResultDir) throws IOException { EvalExprExec evalExprExec = new EvalExprExec(taskAttemptContext, (EvalExprNode) insertNode.getChild()); InsertRowsExec exec = new InsertRowsExec(taskAttemptContext, insertNode, evalExprExec); try { exec.init(); exec.next(); } finally { exec.close(); } FileSystem fs = TajoConf.getWarehouseDir(context.getConf()).getFileSystem(context.getConf()); if (insertNode.isOverwrite()) { // INSERT OVERWRITE INTO // it moves the original table into the temporary location. // Then it moves the new result table into the original table location. // Upon failed, it recovers the original table if possible. boolean movedToOldTable = false; boolean committed = false; Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME); try { if (fs.exists(finalOutputPath)) { fs.rename(finalOutputPath, oldTableDir); movedToOldTable = fs.exists(oldTableDir); } else { // if the parent does not exist, make its parent directory. fs.mkdirs(finalOutputPath.getParent()); } fs.rename(stagingResultDir, finalOutputPath); committed = fs.exists(finalOutputPath); } catch (IOException ioe) { // recover the old table if (movedToOldTable && !committed) { fs.rename(oldTableDir, finalOutputPath); } } } else { FileStatus[] files = fs.listStatus(stagingResultDir); for (FileStatus eachFile : files) { Path targetFilePath = new Path(finalOutputPath, eachFile.getPath().getName()); if (fs.exists(targetFilePath)) { targetFilePath = new Path( finalOutputPath, eachFile.getPath().getName() + "_" + System.currentTimeMillis()); } fs.rename(eachFile.getPath(), targetFilePath); } } }
@Override public void init() throws IOException { QueryContext queryContext = new QueryContext(masterContext.getConf()); currentRow = 0; MasterPlan masterPlan = new MasterPlan(queryId, queryContext, logicalPlan); GlobalPlanner globalPlanner = new GlobalPlanner(masterContext.getConf(), masterContext.getCatalog()); try { globalPlanner.build(queryContext, masterPlan); } catch (PlanningException e) { throw new RuntimeException(e); } ExecutionBlockCursor cursor = new ExecutionBlockCursor(masterPlan); ExecutionBlock leafBlock = null; for (ExecutionBlock block : cursor) { if (masterPlan.isLeaf(block)) { leafBlock = block; break; } } if (leafBlock == null) { throw new InvalidQueryException("Global planner could not find any leaf block."); } taskContext = new TaskAttemptContext( queryContext, null, new TaskAttemptId(new TaskId(leafBlock.getId(), 0), 0), null, null); physicalExec = new SimplePhysicalPlannerImpl(masterContext.getConf()) .createPlan(taskContext, leafBlock.getPlan()); tableDesc = new TableDesc( "table_" + System.currentTimeMillis(), physicalExec.getSchema(), new TableMeta("SYSTEM", new KeyValueSet()), null); outSchema = physicalExec.getSchema(); encoder = RowStoreUtil.createEncoder(getLogicalSchema()); physicalExec.init(); }
/** Insert row values */ private void insertRowValues( QueryContext queryContext, InsertNode insertNode, SubmitQueryResponse.Builder responseBuilder) { try { String nodeUniqName = insertNode.getTableName() == null ? new Path(insertNode.getUri()).getName() : insertNode.getTableName(); String queryId = nodeUniqName + "_" + System.currentTimeMillis(); URI finalOutputUri = insertNode.getUri(); Tablespace space = TablespaceManager.get(finalOutputUri).get(); TableMeta tableMeta = new TableMeta(insertNode.getStorageType(), insertNode.getOptions()); tableMeta.putOption(StorageConstants.INSERT_DIRECTLY, Boolean.TRUE.toString()); FormatProperty formatProperty = space.getFormatProperty(tableMeta); TaskAttemptContext taskAttemptContext; if (formatProperty .directInsertSupported()) { // if this format and storage supports direct insertion taskAttemptContext = new TaskAttemptContext(queryContext, null, null, null, null); taskAttemptContext.setOutputPath(new Path(finalOutputUri)); EvalExprExec evalExprExec = new EvalExprExec(taskAttemptContext, (EvalExprNode) insertNode.getChild()); InsertRowsExec exec = new InsertRowsExec(taskAttemptContext, insertNode, evalExprExec); try { exec.init(); exec.next(); } finally { exec.close(); } } else { URI stagingSpaceUri = space.prepareStagingSpace(context.getConf(), queryId, queryContext, tableMeta); Path stagingDir = new Path(stagingSpaceUri); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); taskAttemptContext = new TaskAttemptContext(queryContext, null, null, null, stagingDir); taskAttemptContext.setOutputPath(new Path(stagingResultDir, "part-01-000000")); insertRowsThroughStaging( taskAttemptContext, insertNode, new Path(finalOutputUri), stagingDir, stagingResultDir); } // set insert stats (how many rows and bytes) TableStats stats = new TableStats(); stats.setNumBytes(taskAttemptContext.getResultStats().getNumBytes()); stats.setNumRows(taskAttemptContext.getResultStats().getNumRows()); if (insertNode.hasTargetTable()) { CatalogProtos.UpdateTableStatsProto.Builder builder = CatalogProtos.UpdateTableStatsProto.newBuilder(); builder.setTableName(insertNode.getTableName()); builder.setStats(stats.getProto()); catalog.updateTableStats(builder.build()); TableDesc desc = new TableDesc( insertNode.getTableName(), insertNode.getTargetSchema(), tableMeta, finalOutputUri); responseBuilder.setTableDesc(desc.getProto()); } else { // If INSERT INTO LOCATION // Empty TableDesc List<CatalogProtos.ColumnProto> columns = new ArrayList<CatalogProtos.ColumnProto>(); CatalogProtos.TableDescProto tableDescProto = CatalogProtos.TableDescProto.newBuilder() .setTableName(nodeUniqName) .setMeta( CatalogProtos.TableProto.newBuilder() .setStoreType(BuiltinStorages.TEXT) .build()) .setSchema(CatalogProtos.SchemaProto.newBuilder().addAllFields(columns).build()) .setStats(stats.getProto()) .build(); responseBuilder.setTableDesc(tableDescProto); } // If queryId == NULL_QUERY_ID and MaxRowNum == -1, TajoCli prints only number of inserted // rows. responseBuilder.setMaxRowNum(-1); responseBuilder.setQueryId(QueryIdFactory.NULL_QUERY_ID.getProto()); responseBuilder.setResultType(ResultType.NO_RESULT); responseBuilder.setState(OK); } catch (Throwable t) { throw new RuntimeException(t); } }
public class TestBSTIndexExec { private TajoConf conf; private Path idxPath; private CatalogService catalog; private SQLAnalyzer analyzer; private LogicalPlanner planner; private LogicalOptimizer optimizer; private AbstractStorageManager sm; private Schema idxSchema; private TupleComparator comp; private BSTIndex.BSTIndexWriter writer; private HashMap<Integer, Integer> randomValues; private int rndKey = -1; private FileSystem fs; private TableMeta meta; private Path tablePath; private Random rnd = new Random(System.currentTimeMillis()); private TajoTestingCluster util; @Before public void setup() throws Exception { this.randomValues = new HashMap<Integer, Integer>(); this.conf = new TajoConf(); util = new TajoTestingCluster(); util.startCatalogCluster(); catalog = util.getMiniCatalogCluster().getCatalog(); Path workDir = CommonTestingUtil.getTestDir(); catalog.createTablespace(DEFAULT_TABLESPACE_NAME, workDir.toUri().toString()); catalog.createDatabase(TajoConstants.DEFAULT_DATABASE_NAME, DEFAULT_TABLESPACE_NAME); sm = StorageManagerFactory.getStorageManager(conf, workDir); idxPath = new Path(workDir, "test.idx"); Schema schema = new Schema(); schema.addColumn("managerid", Type.INT4); schema.addColumn("empid", Type.INT4); schema.addColumn("deptname", Type.TEXT); this.idxSchema = new Schema(); idxSchema.addColumn("managerid", Type.INT4); SortSpec[] sortKeys = new SortSpec[1]; sortKeys[0] = new SortSpec(idxSchema.getColumn("managerid"), true, false); this.comp = new TupleComparator(idxSchema, sortKeys); this.writer = new BSTIndex(conf) .getIndexWriter(idxPath, BSTIndex.TWO_LEVEL_INDEX, this.idxSchema, this.comp); writer.setLoadNum(100); writer.open(); long offset; meta = CatalogUtil.newTableMeta(StoreType.CSV); tablePath = StorageUtil.concatPath(workDir, "employee", "table.csv"); fs = tablePath.getFileSystem(conf); fs.mkdirs(tablePath.getParent()); FileAppender appender = (FileAppender) StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); appender.init(); Tuple tuple = new VTuple(schema.size()); for (int i = 0; i < 10000; i++) { Tuple key = new VTuple(this.idxSchema.size()); int rndKey = rnd.nextInt(250); if (this.randomValues.containsKey(rndKey)) { int t = this.randomValues.remove(rndKey) + 1; this.randomValues.put(rndKey, t); } else { this.randomValues.put(rndKey, 1); } key.put(new Datum[] {DatumFactory.createInt4(rndKey)}); tuple.put( new Datum[] { DatumFactory.createInt4(rndKey), DatumFactory.createInt4(rnd.nextInt(10)), DatumFactory.createText("dept_" + rnd.nextInt(10)) }); offset = appender.getOffset(); appender.addTuple(tuple); writer.write(key, offset); } appender.flush(); appender.close(); writer.close(); TableDesc desc = new TableDesc( CatalogUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, "employee"), schema, meta, sm.getTablePath("employee")); catalog.createTable(desc); analyzer = new SQLAnalyzer(); planner = new LogicalPlanner(catalog); optimizer = new LogicalOptimizer(conf); } @After public void tearDown() { util.shutdownCatalogCluster(); } @Test public void testEqual() throws Exception { if (conf.getBoolean("tajo.storage.manager.v2", false)) { return; } this.rndKey = rnd.nextInt(250); final String QUERY = "select * from employee where managerId = " + rndKey; FileFragment[] frags = StorageManager.splitNG(conf, "default.employee", meta, tablePath, Integer.MAX_VALUE); Path workDir = CommonTestingUtil.getTestDir("target/test-data/testEqual"); TaskAttemptContext ctx = new TaskAttemptContext( conf, LocalTajoTestingUtility.newQueryUnitAttemptId(), new FileFragment[] {frags[0]}, workDir); Expr expr = analyzer.parse(QUERY); LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummySession(), expr); LogicalNode rootNode = optimizer.optimize(plan); TmpPlanner phyPlanner = new TmpPlanner(conf, sm); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); int tupleCount = this.randomValues.get(rndKey); int counter = 0; exec.init(); while (exec.next() != null) { counter++; } exec.close(); assertEquals(tupleCount, counter); } private class TmpPlanner extends PhysicalPlannerImpl { public TmpPlanner(TajoConf conf, AbstractStorageManager sm) { super(conf, sm); } @Override public PhysicalExec createScanPlan( TaskAttemptContext ctx, ScanNode scanNode, Stack<LogicalNode> stack) throws IOException { Preconditions.checkNotNull( ctx.getTable(scanNode.getTableName()), "Error: There is no table matched to %s", scanNode.getTableName()); List<FileFragment> fragments = FragmentConvertor.convert( ctx.getConf(), meta.getStoreType(), ctx.getTables(scanNode.getTableName())); Datum[] datum = new Datum[] {DatumFactory.createInt4(rndKey)}; return new BSTIndexScanExec( ctx, sm, scanNode, fragments.get(0), idxPath, idxSchema, comp, datum); } } }
public class TestExternalSortExec { private TajoConf conf; private TajoTestingCluster util; private final String TEST_PATH = "target/test-data/TestExternalSortExec"; private CatalogService catalog; private SQLAnalyzer analyzer; private LogicalPlanner planner; private AbstractStorageManager sm; private Path testDir; private final int numTuple = 100000; private Random rnd = new Random(System.currentTimeMillis()); private TableDesc employee; @Before public void setUp() throws Exception { this.conf = new TajoConf(); util = new TajoTestingCluster(); catalog = util.startCatalogCluster().getCatalog(); testDir = CommonTestingUtil.getTestDir(TEST_PATH); conf.setVar(TajoConf.ConfVars.WORKER_TEMPORAL_DIR, testDir.toString()); sm = StorageManagerFactory.getStorageManager(conf, testDir); Schema schema = new Schema(); schema.addColumn("managerId", Type.INT4); schema.addColumn("empId", Type.INT4); schema.addColumn("deptName", Type.TEXT); TableMeta employeeMeta = CatalogUtil.newTableMeta(StoreType.CSV); Path employeePath = new Path(testDir, "employee.csv"); Appender appender = StorageManagerFactory.getStorageManager(conf) .getAppender(employeeMeta, schema, employeePath); appender.enableStats(); appender.init(); Tuple tuple = new VTuple(schema.getColumnNum()); for (int i = 0; i < numTuple; i++) { tuple.put( new Datum[] { DatumFactory.createInt4(rnd.nextInt(50)), DatumFactory.createInt4(rnd.nextInt(100)), DatumFactory.createText("dept_" + i), }); appender.addTuple(tuple); } appender.flush(); appender.close(); System.out.println( appender.getStats().getNumRows() + " rows (" + (appender.getStats().getNumBytes() / 1048576) + " MB)"); employee = new TableDesc("employee", schema, employeeMeta, employeePath); catalog.addTable(employee); analyzer = new SQLAnalyzer(); planner = new LogicalPlanner(catalog); } @After public void tearDown() throws Exception { CommonTestingUtil.cleanupTestDir(TEST_PATH); util.shutdownCatalogCluster(); } String[] QUERIES = {"select managerId, empId from employee order by managerId, empId"}; @Test public final void testNext() throws IOException, PlanningException { FileFragment[] frags = StorageManager.splitNG( conf, "employee", employee.getMeta(), employee.getPath(), Integer.MAX_VALUE); Path workDir = new Path(testDir, TestExternalSortExec.class.getName()); TaskAttemptContext ctx = new TaskAttemptContext( conf, LocalTajoTestingUtility.newQueryUnitAttemptId(), new FileFragment[] {frags[0]}, workDir); ctx.setEnforcer(new Enforcer()); Expr expr = analyzer.parse(QUERIES[0]); LogicalPlan plan = planner.createPlan(expr); LogicalNode rootNode = plan.getRootBlock().getRoot(); PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, sm); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); ProjectionExec proj = (ProjectionExec) exec; // TODO - should be planed with user's optimization hint if (!(proj.getChild() instanceof ExternalSortExec)) { UnaryPhysicalExec sortExec = proj.getChild(); SeqScanExec scan = sortExec.getChild(); ExternalSortExec extSort = new ExternalSortExec(ctx, sm, ((MemSortExec) sortExec).getPlan(), scan); proj.setChild(extSort); } Tuple tuple; Tuple preVal = null; Tuple curVal; int cnt = 0; exec.init(); long start = System.currentTimeMillis(); TupleComparator comparator = new TupleComparator( proj.getSchema(), new SortSpec[] { new SortSpec(new Column("managerId", Type.INT4)), new SortSpec(new Column("empId", Type.INT4)) }); while ((tuple = exec.next()) != null) { curVal = tuple; if (preVal != null) { assertTrue( "prev: " + preVal + ", but cur: " + curVal, comparator.compare(preVal, curVal) <= 0); } preVal = curVal; cnt++; } long end = System.currentTimeMillis(); assertEquals(numTuple, cnt); // for rescan test preVal = null; exec.rescan(); cnt = 0; while ((tuple = exec.next()) != null) { curVal = tuple; if (preVal != null) { assertTrue( "prev: " + preVal + ", but cur: " + curVal, comparator.compare(preVal, curVal) <= 0); } preVal = curVal; cnt++; } assertEquals(numTuple, cnt); exec.close(); System.out.println("Sort Time: " + (end - start) + " msc"); } }
@Test public final void testNext() throws IOException, PlanningException { FileFragment[] frags = StorageManager.splitNG( conf, "employee", employee.getMeta(), employee.getPath(), Integer.MAX_VALUE); Path workDir = new Path(testDir, TestExternalSortExec.class.getName()); TaskAttemptContext ctx = new TaskAttemptContext( conf, LocalTajoTestingUtility.newQueryUnitAttemptId(), new FileFragment[] {frags[0]}, workDir); ctx.setEnforcer(new Enforcer()); Expr expr = analyzer.parse(QUERIES[0]); LogicalPlan plan = planner.createPlan(expr); LogicalNode rootNode = plan.getRootBlock().getRoot(); PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, sm); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); ProjectionExec proj = (ProjectionExec) exec; // TODO - should be planed with user's optimization hint if (!(proj.getChild() instanceof ExternalSortExec)) { UnaryPhysicalExec sortExec = proj.getChild(); SeqScanExec scan = sortExec.getChild(); ExternalSortExec extSort = new ExternalSortExec(ctx, sm, ((MemSortExec) sortExec).getPlan(), scan); proj.setChild(extSort); } Tuple tuple; Tuple preVal = null; Tuple curVal; int cnt = 0; exec.init(); long start = System.currentTimeMillis(); TupleComparator comparator = new TupleComparator( proj.getSchema(), new SortSpec[] { new SortSpec(new Column("managerId", Type.INT4)), new SortSpec(new Column("empId", Type.INT4)) }); while ((tuple = exec.next()) != null) { curVal = tuple; if (preVal != null) { assertTrue( "prev: " + preVal + ", but cur: " + curVal, comparator.compare(preVal, curVal) <= 0); } preVal = curVal; cnt++; } long end = System.currentTimeMillis(); assertEquals(numTuple, cnt); // for rescan test preVal = null; exec.rescan(); cnt = 0; while ((tuple = exec.next()) != null) { curVal = tuple; if (preVal != null) { assertTrue( "prev: " + preVal + ", but cur: " + curVal, comparator.compare(preVal, curVal) <= 0); } preVal = curVal; cnt++; } assertEquals(numTuple, cnt); exec.close(); System.out.println("Sort Time: " + (end - start) + " msc"); }