Example #1
0
  /** Insert rows through staging phase */
  private void insertRowsThroughStaging(
      TaskAttemptContext taskAttemptContext,
      InsertNode insertNode,
      Path finalOutputPath,
      Path stagingDir,
      Path stagingResultDir)
      throws IOException {

    EvalExprExec evalExprExec =
        new EvalExprExec(taskAttemptContext, (EvalExprNode) insertNode.getChild());
    InsertRowsExec exec = new InsertRowsExec(taskAttemptContext, insertNode, evalExprExec);

    try {
      exec.init();
      exec.next();
    } finally {
      exec.close();
    }

    FileSystem fs = TajoConf.getWarehouseDir(context.getConf()).getFileSystem(context.getConf());

    if (insertNode.isOverwrite()) { // INSERT OVERWRITE INTO
      // it moves the original table into the temporary location.
      // Then it moves the new result table into the original table location.
      // Upon failed, it recovers the original table if possible.
      boolean movedToOldTable = false;
      boolean committed = false;
      Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME);
      try {
        if (fs.exists(finalOutputPath)) {
          fs.rename(finalOutputPath, oldTableDir);
          movedToOldTable = fs.exists(oldTableDir);
        } else { // if the parent does not exist, make its parent directory.
          fs.mkdirs(finalOutputPath.getParent());
        }
        fs.rename(stagingResultDir, finalOutputPath);
        committed = fs.exists(finalOutputPath);
      } catch (IOException ioe) {
        // recover the old table
        if (movedToOldTable && !committed) {
          fs.rename(oldTableDir, finalOutputPath);
        }
      }
    } else {
      FileStatus[] files = fs.listStatus(stagingResultDir);
      for (FileStatus eachFile : files) {
        Path targetFilePath = new Path(finalOutputPath, eachFile.getPath().getName());
        if (fs.exists(targetFilePath)) {
          targetFilePath =
              new Path(
                  finalOutputPath, eachFile.getPath().getName() + "_" + System.currentTimeMillis());
        }
        fs.rename(eachFile.getPath(), targetFilePath);
      }
    }
  }
  @Override
  public void init() throws IOException {
    QueryContext queryContext = new QueryContext(masterContext.getConf());
    currentRow = 0;

    MasterPlan masterPlan = new MasterPlan(queryId, queryContext, logicalPlan);
    GlobalPlanner globalPlanner =
        new GlobalPlanner(masterContext.getConf(), masterContext.getCatalog());
    try {
      globalPlanner.build(queryContext, masterPlan);
    } catch (PlanningException e) {
      throw new RuntimeException(e);
    }

    ExecutionBlockCursor cursor = new ExecutionBlockCursor(masterPlan);
    ExecutionBlock leafBlock = null;
    for (ExecutionBlock block : cursor) {
      if (masterPlan.isLeaf(block)) {
        leafBlock = block;
        break;
      }
    }

    if (leafBlock == null) {
      throw new InvalidQueryException("Global planner could not find any leaf block.");
    }

    taskContext =
        new TaskAttemptContext(
            queryContext, null, new TaskAttemptId(new TaskId(leafBlock.getId(), 0), 0), null, null);
    physicalExec =
        new SimplePhysicalPlannerImpl(masterContext.getConf())
            .createPlan(taskContext, leafBlock.getPlan());

    tableDesc =
        new TableDesc(
            "table_" + System.currentTimeMillis(),
            physicalExec.getSchema(),
            new TableMeta("SYSTEM", new KeyValueSet()),
            null);
    outSchema = physicalExec.getSchema();
    encoder = RowStoreUtil.createEncoder(getLogicalSchema());

    physicalExec.init();
  }
Example #3
0
  /** Insert row values */
  private void insertRowValues(
      QueryContext queryContext,
      InsertNode insertNode,
      SubmitQueryResponse.Builder responseBuilder) {
    try {
      String nodeUniqName =
          insertNode.getTableName() == null
              ? new Path(insertNode.getUri()).getName()
              : insertNode.getTableName();
      String queryId = nodeUniqName + "_" + System.currentTimeMillis();

      URI finalOutputUri = insertNode.getUri();
      Tablespace space = TablespaceManager.get(finalOutputUri).get();
      TableMeta tableMeta = new TableMeta(insertNode.getStorageType(), insertNode.getOptions());
      tableMeta.putOption(StorageConstants.INSERT_DIRECTLY, Boolean.TRUE.toString());

      FormatProperty formatProperty = space.getFormatProperty(tableMeta);

      TaskAttemptContext taskAttemptContext;
      if (formatProperty
          .directInsertSupported()) { // if this format and storage supports direct insertion
        taskAttemptContext = new TaskAttemptContext(queryContext, null, null, null, null);
        taskAttemptContext.setOutputPath(new Path(finalOutputUri));

        EvalExprExec evalExprExec =
            new EvalExprExec(taskAttemptContext, (EvalExprNode) insertNode.getChild());
        InsertRowsExec exec = new InsertRowsExec(taskAttemptContext, insertNode, evalExprExec);

        try {
          exec.init();
          exec.next();
        } finally {
          exec.close();
        }
      } else {
        URI stagingSpaceUri =
            space.prepareStagingSpace(context.getConf(), queryId, queryContext, tableMeta);
        Path stagingDir = new Path(stagingSpaceUri);
        Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);

        taskAttemptContext = new TaskAttemptContext(queryContext, null, null, null, stagingDir);
        taskAttemptContext.setOutputPath(new Path(stagingResultDir, "part-01-000000"));
        insertRowsThroughStaging(
            taskAttemptContext, insertNode, new Path(finalOutputUri), stagingDir, stagingResultDir);
      }

      // set insert stats (how many rows and bytes)
      TableStats stats = new TableStats();
      stats.setNumBytes(taskAttemptContext.getResultStats().getNumBytes());
      stats.setNumRows(taskAttemptContext.getResultStats().getNumRows());

      if (insertNode.hasTargetTable()) {
        CatalogProtos.UpdateTableStatsProto.Builder builder =
            CatalogProtos.UpdateTableStatsProto.newBuilder();
        builder.setTableName(insertNode.getTableName());
        builder.setStats(stats.getProto());

        catalog.updateTableStats(builder.build());

        TableDesc desc =
            new TableDesc(
                insertNode.getTableName(), insertNode.getTargetSchema(), tableMeta, finalOutputUri);
        responseBuilder.setTableDesc(desc.getProto());

      } else { // If INSERT INTO LOCATION

        // Empty TableDesc
        List<CatalogProtos.ColumnProto> columns = new ArrayList<CatalogProtos.ColumnProto>();
        CatalogProtos.TableDescProto tableDescProto =
            CatalogProtos.TableDescProto.newBuilder()
                .setTableName(nodeUniqName)
                .setMeta(
                    CatalogProtos.TableProto.newBuilder()
                        .setStoreType(BuiltinStorages.TEXT)
                        .build())
                .setSchema(CatalogProtos.SchemaProto.newBuilder().addAllFields(columns).build())
                .setStats(stats.getProto())
                .build();

        responseBuilder.setTableDesc(tableDescProto);
      }

      // If queryId == NULL_QUERY_ID and MaxRowNum == -1, TajoCli prints only number of inserted
      // rows.
      responseBuilder.setMaxRowNum(-1);
      responseBuilder.setQueryId(QueryIdFactory.NULL_QUERY_ID.getProto());
      responseBuilder.setResultType(ResultType.NO_RESULT);
      responseBuilder.setState(OK);
    } catch (Throwable t) {
      throw new RuntimeException(t);
    }
  }
public class TestBSTIndexExec {

  private TajoConf conf;
  private Path idxPath;
  private CatalogService catalog;
  private SQLAnalyzer analyzer;
  private LogicalPlanner planner;
  private LogicalOptimizer optimizer;
  private AbstractStorageManager sm;
  private Schema idxSchema;
  private TupleComparator comp;
  private BSTIndex.BSTIndexWriter writer;
  private HashMap<Integer, Integer> randomValues;
  private int rndKey = -1;
  private FileSystem fs;
  private TableMeta meta;
  private Path tablePath;

  private Random rnd = new Random(System.currentTimeMillis());

  private TajoTestingCluster util;

  @Before
  public void setup() throws Exception {
    this.randomValues = new HashMap<Integer, Integer>();
    this.conf = new TajoConf();
    util = new TajoTestingCluster();
    util.startCatalogCluster();
    catalog = util.getMiniCatalogCluster().getCatalog();

    Path workDir = CommonTestingUtil.getTestDir();
    catalog.createTablespace(DEFAULT_TABLESPACE_NAME, workDir.toUri().toString());
    catalog.createDatabase(TajoConstants.DEFAULT_DATABASE_NAME, DEFAULT_TABLESPACE_NAME);
    sm = StorageManagerFactory.getStorageManager(conf, workDir);

    idxPath = new Path(workDir, "test.idx");

    Schema schema = new Schema();
    schema.addColumn("managerid", Type.INT4);
    schema.addColumn("empid", Type.INT4);
    schema.addColumn("deptname", Type.TEXT);

    this.idxSchema = new Schema();
    idxSchema.addColumn("managerid", Type.INT4);
    SortSpec[] sortKeys = new SortSpec[1];
    sortKeys[0] = new SortSpec(idxSchema.getColumn("managerid"), true, false);
    this.comp = new TupleComparator(idxSchema, sortKeys);

    this.writer =
        new BSTIndex(conf)
            .getIndexWriter(idxPath, BSTIndex.TWO_LEVEL_INDEX, this.idxSchema, this.comp);
    writer.setLoadNum(100);
    writer.open();
    long offset;

    meta = CatalogUtil.newTableMeta(StoreType.CSV);
    tablePath = StorageUtil.concatPath(workDir, "employee", "table.csv");
    fs = tablePath.getFileSystem(conf);
    fs.mkdirs(tablePath.getParent());

    FileAppender appender =
        (FileAppender)
            StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath);
    appender.init();
    Tuple tuple = new VTuple(schema.size());
    for (int i = 0; i < 10000; i++) {

      Tuple key = new VTuple(this.idxSchema.size());
      int rndKey = rnd.nextInt(250);
      if (this.randomValues.containsKey(rndKey)) {
        int t = this.randomValues.remove(rndKey) + 1;
        this.randomValues.put(rndKey, t);
      } else {
        this.randomValues.put(rndKey, 1);
      }

      key.put(new Datum[] {DatumFactory.createInt4(rndKey)});
      tuple.put(
          new Datum[] {
            DatumFactory.createInt4(rndKey),
            DatumFactory.createInt4(rnd.nextInt(10)),
            DatumFactory.createText("dept_" + rnd.nextInt(10))
          });
      offset = appender.getOffset();
      appender.addTuple(tuple);
      writer.write(key, offset);
    }
    appender.flush();
    appender.close();
    writer.close();

    TableDesc desc =
        new TableDesc(
            CatalogUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, "employee"),
            schema,
            meta,
            sm.getTablePath("employee"));
    catalog.createTable(desc);

    analyzer = new SQLAnalyzer();
    planner = new LogicalPlanner(catalog);
    optimizer = new LogicalOptimizer(conf);
  }

  @After
  public void tearDown() {
    util.shutdownCatalogCluster();
  }

  @Test
  public void testEqual() throws Exception {
    if (conf.getBoolean("tajo.storage.manager.v2", false)) {
      return;
    }
    this.rndKey = rnd.nextInt(250);
    final String QUERY = "select * from employee where managerId = " + rndKey;

    FileFragment[] frags =
        StorageManager.splitNG(conf, "default.employee", meta, tablePath, Integer.MAX_VALUE);
    Path workDir = CommonTestingUtil.getTestDir("target/test-data/testEqual");
    TaskAttemptContext ctx =
        new TaskAttemptContext(
            conf,
            LocalTajoTestingUtility.newQueryUnitAttemptId(),
            new FileFragment[] {frags[0]},
            workDir);
    Expr expr = analyzer.parse(QUERY);
    LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummySession(), expr);
    LogicalNode rootNode = optimizer.optimize(plan);

    TmpPlanner phyPlanner = new TmpPlanner(conf, sm);
    PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode);

    int tupleCount = this.randomValues.get(rndKey);
    int counter = 0;
    exec.init();
    while (exec.next() != null) {
      counter++;
    }
    exec.close();
    assertEquals(tupleCount, counter);
  }

  private class TmpPlanner extends PhysicalPlannerImpl {
    public TmpPlanner(TajoConf conf, AbstractStorageManager sm) {
      super(conf, sm);
    }

    @Override
    public PhysicalExec createScanPlan(
        TaskAttemptContext ctx, ScanNode scanNode, Stack<LogicalNode> stack) throws IOException {
      Preconditions.checkNotNull(
          ctx.getTable(scanNode.getTableName()),
          "Error: There is no table matched to %s",
          scanNode.getTableName());

      List<FileFragment> fragments =
          FragmentConvertor.convert(
              ctx.getConf(), meta.getStoreType(), ctx.getTables(scanNode.getTableName()));

      Datum[] datum = new Datum[] {DatumFactory.createInt4(rndKey)};

      return new BSTIndexScanExec(
          ctx, sm, scanNode, fragments.get(0), idxPath, idxSchema, comp, datum);
    }
  }
}
public class TestExternalSortExec {
  private TajoConf conf;
  private TajoTestingCluster util;
  private final String TEST_PATH = "target/test-data/TestExternalSortExec";
  private CatalogService catalog;
  private SQLAnalyzer analyzer;
  private LogicalPlanner planner;
  private AbstractStorageManager sm;
  private Path testDir;

  private final int numTuple = 100000;
  private Random rnd = new Random(System.currentTimeMillis());

  private TableDesc employee;

  @Before
  public void setUp() throws Exception {
    this.conf = new TajoConf();
    util = new TajoTestingCluster();
    catalog = util.startCatalogCluster().getCatalog();
    testDir = CommonTestingUtil.getTestDir(TEST_PATH);
    conf.setVar(TajoConf.ConfVars.WORKER_TEMPORAL_DIR, testDir.toString());
    sm = StorageManagerFactory.getStorageManager(conf, testDir);

    Schema schema = new Schema();
    schema.addColumn("managerId", Type.INT4);
    schema.addColumn("empId", Type.INT4);
    schema.addColumn("deptName", Type.TEXT);

    TableMeta employeeMeta = CatalogUtil.newTableMeta(StoreType.CSV);
    Path employeePath = new Path(testDir, "employee.csv");
    Appender appender =
        StorageManagerFactory.getStorageManager(conf)
            .getAppender(employeeMeta, schema, employeePath);
    appender.enableStats();
    appender.init();
    Tuple tuple = new VTuple(schema.getColumnNum());
    for (int i = 0; i < numTuple; i++) {
      tuple.put(
          new Datum[] {
            DatumFactory.createInt4(rnd.nextInt(50)),
            DatumFactory.createInt4(rnd.nextInt(100)),
            DatumFactory.createText("dept_" + i),
          });
      appender.addTuple(tuple);
    }
    appender.flush();
    appender.close();

    System.out.println(
        appender.getStats().getNumRows()
            + " rows ("
            + (appender.getStats().getNumBytes() / 1048576)
            + " MB)");

    employee = new TableDesc("employee", schema, employeeMeta, employeePath);
    catalog.addTable(employee);
    analyzer = new SQLAnalyzer();
    planner = new LogicalPlanner(catalog);
  }

  @After
  public void tearDown() throws Exception {
    CommonTestingUtil.cleanupTestDir(TEST_PATH);
    util.shutdownCatalogCluster();
  }

  String[] QUERIES = {"select managerId, empId from employee order by managerId, empId"};

  @Test
  public final void testNext() throws IOException, PlanningException {
    FileFragment[] frags =
        StorageManager.splitNG(
            conf, "employee", employee.getMeta(), employee.getPath(), Integer.MAX_VALUE);
    Path workDir = new Path(testDir, TestExternalSortExec.class.getName());
    TaskAttemptContext ctx =
        new TaskAttemptContext(
            conf,
            LocalTajoTestingUtility.newQueryUnitAttemptId(),
            new FileFragment[] {frags[0]},
            workDir);
    ctx.setEnforcer(new Enforcer());
    Expr expr = analyzer.parse(QUERIES[0]);
    LogicalPlan plan = planner.createPlan(expr);
    LogicalNode rootNode = plan.getRootBlock().getRoot();

    PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, sm);
    PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode);

    ProjectionExec proj = (ProjectionExec) exec;

    // TODO - should be planed with user's optimization hint
    if (!(proj.getChild() instanceof ExternalSortExec)) {
      UnaryPhysicalExec sortExec = proj.getChild();
      SeqScanExec scan = sortExec.getChild();

      ExternalSortExec extSort =
          new ExternalSortExec(ctx, sm, ((MemSortExec) sortExec).getPlan(), scan);
      proj.setChild(extSort);
    }

    Tuple tuple;
    Tuple preVal = null;
    Tuple curVal;
    int cnt = 0;
    exec.init();
    long start = System.currentTimeMillis();
    TupleComparator comparator =
        new TupleComparator(
            proj.getSchema(),
            new SortSpec[] {
              new SortSpec(new Column("managerId", Type.INT4)),
              new SortSpec(new Column("empId", Type.INT4))
            });

    while ((tuple = exec.next()) != null) {
      curVal = tuple;
      if (preVal != null) {
        assertTrue(
            "prev: " + preVal + ", but cur: " + curVal, comparator.compare(preVal, curVal) <= 0);
      }
      preVal = curVal;
      cnt++;
    }
    long end = System.currentTimeMillis();
    assertEquals(numTuple, cnt);

    // for rescan test
    preVal = null;
    exec.rescan();
    cnt = 0;
    while ((tuple = exec.next()) != null) {
      curVal = tuple;
      if (preVal != null) {
        assertTrue(
            "prev: " + preVal + ", but cur: " + curVal, comparator.compare(preVal, curVal) <= 0);
      }
      preVal = curVal;
      cnt++;
    }
    assertEquals(numTuple, cnt);
    exec.close();
    System.out.println("Sort Time: " + (end - start) + " msc");
  }
}
  @Test
  public final void testNext() throws IOException, PlanningException {
    FileFragment[] frags =
        StorageManager.splitNG(
            conf, "employee", employee.getMeta(), employee.getPath(), Integer.MAX_VALUE);
    Path workDir = new Path(testDir, TestExternalSortExec.class.getName());
    TaskAttemptContext ctx =
        new TaskAttemptContext(
            conf,
            LocalTajoTestingUtility.newQueryUnitAttemptId(),
            new FileFragment[] {frags[0]},
            workDir);
    ctx.setEnforcer(new Enforcer());
    Expr expr = analyzer.parse(QUERIES[0]);
    LogicalPlan plan = planner.createPlan(expr);
    LogicalNode rootNode = plan.getRootBlock().getRoot();

    PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, sm);
    PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode);

    ProjectionExec proj = (ProjectionExec) exec;

    // TODO - should be planed with user's optimization hint
    if (!(proj.getChild() instanceof ExternalSortExec)) {
      UnaryPhysicalExec sortExec = proj.getChild();
      SeqScanExec scan = sortExec.getChild();

      ExternalSortExec extSort =
          new ExternalSortExec(ctx, sm, ((MemSortExec) sortExec).getPlan(), scan);
      proj.setChild(extSort);
    }

    Tuple tuple;
    Tuple preVal = null;
    Tuple curVal;
    int cnt = 0;
    exec.init();
    long start = System.currentTimeMillis();
    TupleComparator comparator =
        new TupleComparator(
            proj.getSchema(),
            new SortSpec[] {
              new SortSpec(new Column("managerId", Type.INT4)),
              new SortSpec(new Column("empId", Type.INT4))
            });

    while ((tuple = exec.next()) != null) {
      curVal = tuple;
      if (preVal != null) {
        assertTrue(
            "prev: " + preVal + ", but cur: " + curVal, comparator.compare(preVal, curVal) <= 0);
      }
      preVal = curVal;
      cnt++;
    }
    long end = System.currentTimeMillis();
    assertEquals(numTuple, cnt);

    // for rescan test
    preVal = null;
    exec.rescan();
    cnt = 0;
    while ((tuple = exec.next()) != null) {
      curVal = tuple;
      if (preVal != null) {
        assertTrue(
            "prev: " + preVal + ", but cur: " + curVal, comparator.compare(preVal, curVal) <= 0);
      }
      preVal = curVal;
      cnt++;
    }
    assertEquals(numTuple, cnt);
    exec.close();
    System.out.println("Sort Time: " + (end - start) + " msc");
  }