Beispiel #1
0
  private boolean isClonedTable(String orignalTable, String newTable) throws Exception {
    assertTableExists(newTable);
    TableDesc origTableDesc = client.getTableDesc(orignalTable);
    TableDesc newTableDesc = client.getTableDesc(newTable);

    if (isClonedSchema(origTableDesc.getSchema(), newTableDesc.getSchema()) == false) {
      fail("Schema of input tables do not match");
      return false;
    }

    // Check partition information
    PartitionMethodDesc origPartMethod = origTableDesc.getPartitionMethod();
    PartitionMethodDesc newPartMethod = newTableDesc.getPartitionMethod();
    if (origPartMethod != null) {
      if (newPartMethod == null) {
        fail("New table does not have partition info");
        return false;
      }
      if (isClonedSchema(origPartMethod.getExpressionSchema(), newPartMethod.getExpressionSchema())
          == false) {
        fail("Partition columns of input tables do not match");
        return false;
      }

      if (origPartMethod.getPartitionType().equals(newPartMethod.getPartitionType()) == false) {
        fail("Partition type of input tables do not match");
        return false;
      }
    }

    // Check external flag
    if (origTableDesc.isExternal() != newTableDesc.isExternal()) {
      fail("External table flag on input tables not equal");
      return false;
    }

    if (origTableDesc.getMeta() != null) {
      TableMeta origMeta = origTableDesc.getMeta();
      TableMeta newMeta = newTableDesc.getMeta();
      if (origMeta.getDataFormat().equals(newMeta.getDataFormat()) == false) {
        fail("Store type of input tables not equal");
        return false;
      }

      KeyValueSet origOptions = origMeta.getPropertySet();
      KeyValueSet newOptions = newMeta.getPropertySet();
      if (origOptions.equals(newOptions) == false) {
        fail("Meta options of input tables not equal");
        return false;
      }
    }
    return true;
  }
Beispiel #2
0
  @Test
  public final void testCtasWithOptions() throws Exception {
    ResultSet res = executeFile("CtasWithOptions.sql");
    res.close();

    ResultSet res2 = executeQuery();
    resultSetToString(res2);
    res2.close();

    TableDesc desc =
        client.getTableDesc(CatalogUtil.normalizeIdentifier(res2.getMetaData().getTableName(1)));
    assertNotNull(desc);
    assertTrue("CSV".equalsIgnoreCase(desc.getMeta().getStoreType()));

    KeyValueSet options = desc.getMeta().getOptions();
    assertNotNull(options);
    assertEquals(
        StringEscapeUtils.escapeJava("\u0001"), options.get(StorageConstants.TEXT_DELIMITER));
  }
  @Test
  public final void testRightOuter_MergeJoin3() throws IOException, TajoException {
    Expr expr = analyzer.parse(QUERIES[3]);
    LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot();
    JoinNode joinNode = PlannerUtil.findTopNode(plan, NodeType.JOIN);
    Enforcer enforcer = new Enforcer();
    enforcer.enforceJoinAlgorithm(joinNode.getPID(), JoinAlgorithm.MERGE_JOIN);

    FileFragment[] emp3Frags =
        FileTablespace.splitNG(
            conf, EMP3_NAME, emp3.getMeta(), new Path(emp3.getUri()), Integer.MAX_VALUE);
    FileFragment[] dep4Frags =
        FileTablespace.splitNG(
            conf, DEP4_NAME, dep4.getMeta(), new Path(dep4.getUri()), Integer.MAX_VALUE);
    FileFragment[] merged = TUtil.concat(emp3Frags, dep4Frags);

    Path workDir =
        CommonTestingUtil.getTestDir(
            TajoTestingCluster.DEFAULT_TEST_DIRECTORY + "/testRightOuter_MergeJoin3");
    TaskAttemptContext ctx =
        new TaskAttemptContext(
            new QueryContext(conf), LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir);
    ctx.setEnforcer(enforcer);

    PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf);
    PhysicalExec exec = phyPlanner.createPlan(ctx, plan);

    ProjectionExec proj = (ProjectionExec) exec;
    assertTrue(proj.getChild() instanceof RightOuterMergeJoinExec);

    int count = 0;
    exec.init();

    while (exec.next() != null) {
      // TODO check contents
      count = count + 1;
    }
    assertNull(exec.next());
    exec.close();
    assertEquals(13, count);
  }
Beispiel #4
0
  @Test
  public void testAlterTable() throws Exception {

    // CREATE_TABLE
    TableDesc tableRenameTestDesc = createMockupTable("default", "mycooltable");
    catalog.createTable(tableRenameTestDesc);

    // RENAME_TABLE
    catalog.alterTable(createMockAlterTableName());
    assertTrue(catalog.existsTable("default", "mynewcooltable"));

    // RENAME_COLUMN
    catalog.alterTable(createMockAlterTableRenameColumn());
    TableDesc columnRenameDesc = catalog.getTableDesc("default", "mynewcooltable");
    assertTrue(columnRenameDesc.getSchema().containsByName("ren" + FieldName1));

    // ADD_COLUMN
    catalog.alterTable(createMockAlterTableAddColumn());
    TableDesc addColumnDesc = catalog.getTableDesc("default", "mynewcooltable");
    assertTrue(addColumnDesc.getSchema().containsByName("mynewcol"));

    // SET_PROPERTY
    TableDesc setPropertyDesc = catalog.getTableDesc("default", "mynewcooltable");
    KeyValueSet options = new KeyValueSet();
    options.set("timezone", "GMT+9"); // Seoul, Korea
    setPropertyDesc.setMeta(new TableMeta("TEXT", options));
    String prevTimeZone = setPropertyDesc.getMeta().getProperty("timezone");
    String newTimeZone = "GMT-7"; // Silicon Valley, California
    catalog.alterTable(createMockAlterTableSetProperty(newTimeZone));
    setPropertyDesc = catalog.getTableDesc("default", "mynewcooltable");
    assertNotEquals(prevTimeZone, setPropertyDesc.getMeta().getProperty("timezone"));
    assertEquals(newTimeZone, setPropertyDesc.getMeta().getProperty("timezone"));

    // UNSET_PROPERTY
    catalog.alterTable(createMockAlterTableUnsetProperty(Sets.newHashSet("dummy")));
    setPropertyDesc = catalog.getTableDesc("default", "mynewcooltable");
    assertTrue(setPropertyDesc.getMeta().getPropertySet().containsKey("timezone"));
    assertFalse(setPropertyDesc.getMeta().getPropertySet().containsKey("dummy"));
  }
Beispiel #5
0
  @Test
  public final void testCtasWithTextFile() throws Exception {
    ResultSet res = executeFile("CtasWithTextFile.sql");
    res.close();

    ResultSet res2 = executeQuery();
    resultSetToString(res2);
    res2.close();

    TableDesc desc =
        client.getTableDesc(CatalogUtil.normalizeIdentifier(res2.getMetaData().getTableName(1)));
    assertNotNull(desc);
    assertTrue("TEXT".equalsIgnoreCase(desc.getMeta().getStoreType()));
  }
Beispiel #6
0
  public void executeDistributedQuery(
      QueryContext queryContext,
      Session session,
      LogicalPlan plan,
      String sql,
      String jsonExpr,
      SubmitQueryResponse.Builder responseBuilder)
      throws Exception {
    LogicalRootNode rootNode = plan.getRootBlock().getRoot();

    TableDesc tableDesc = PlannerUtil.getTableDesc(catalog, plan.getRootBlock().getRoot());
    if (tableDesc != null) {

      Tablespace space = TablespaceManager.get(tableDesc.getUri()).get();
      FormatProperty formatProperty = space.getFormatProperty(tableDesc.getMeta());

      if (!formatProperty.isInsertable()) {
        throw new UnsupportedException(
            String.format("INSERT operation on %s tablespace", tableDesc.getUri().toString()));
      }

      space.prepareTable(rootNode.getChild());
    }

    hookManager.doHooks(queryContext, plan);

    QueryManager queryManager = this.context.getQueryJobManager();
    QueryInfo queryInfo;

    queryInfo = queryManager.scheduleQuery(session, queryContext, sql, jsonExpr, rootNode);

    responseBuilder.setState(OK);
    responseBuilder.setQueryId(queryInfo.getQueryId().getProto());
    responseBuilder.setResultType(ResultType.FETCH);
    if (queryInfo.getQueryMasterHost() != null) {
      responseBuilder.setQueryMasterHost(queryInfo.getQueryMasterHost());
    }
    responseBuilder.setQueryMasterPort(queryInfo.getQueryMasterClientPort());
    LOG.info(
        "Query "
            + queryInfo.getQueryId().toString()
            + ","
            + queryInfo.getSql()
            + ","
            + " is forwarded to "
            + queryInfo.getQueryMasterHost()
            + ":"
            + queryInfo.getQueryMasterPort());
  }
Beispiel #7
0
  @Override
  public final void createTable(final CatalogProtos.TableDescProto tableDescProto)
      throws CatalogException {
    HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null;

    TableDesc tableDesc = new TableDesc(tableDescProto);
    String[] splitted = CatalogUtil.splitFQTableName(tableDesc.getName());
    String databaseName = splitted[0];
    String tableName = splitted[1];

    try {
      client = clientPool.getClient();

      org.apache.hadoop.hive.metastore.api.Table table =
          new org.apache.hadoop.hive.metastore.api.Table();
      table.setDbName(databaseName);
      table.setTableName(tableName);
      table.setParameters(
          new HashMap<String, String>(tableDesc.getMeta().getOptions().getAllKeyValus()));
      // TODO: set owner
      // table.setOwner();

      StorageDescriptor sd = new StorageDescriptor();
      sd.setSerdeInfo(new SerDeInfo());
      sd.getSerdeInfo().setParameters(new HashMap<String, String>());
      sd.getSerdeInfo().setName(table.getTableName());

      // if tajo set location method, thrift client make exception as follows:
      // Caused by: MetaException(message:java.lang.NullPointerException)
      // If you want to modify table path, you have to modify on Hive cli.
      if (tableDesc.isExternal()) {
        table.setTableType(TableType.EXTERNAL_TABLE.name());
        table.putToParameters("EXTERNAL", "TRUE");

        Path tablePath = new Path(tableDesc.getUri());
        FileSystem fs = tablePath.getFileSystem(conf);
        if (fs.isFile(tablePath)) {
          LOG.warn("A table path is a file, but HiveCatalogStore does not allow a file path.");
          sd.setLocation(tablePath.getParent().toString());
        } else {
          sd.setLocation(tablePath.toString());
        }
      }

      // set column information
      List<Column> columns = tableDesc.getSchema().getRootColumns();
      ArrayList<FieldSchema> cols = new ArrayList<FieldSchema>(columns.size());

      for (Column eachField : columns) {
        cols.add(
            new FieldSchema(
                eachField.getSimpleName(),
                HiveCatalogUtil.getHiveFieldType(eachField.getDataType()),
                ""));
      }
      sd.setCols(cols);

      // set partition keys
      if (tableDesc.hasPartition()
          && tableDesc.getPartitionMethod().getPartitionType().equals(PartitionType.COLUMN)) {
        List<FieldSchema> partitionKeys = new ArrayList<FieldSchema>();
        for (Column eachPartitionKey :
            tableDesc.getPartitionMethod().getExpressionSchema().getRootColumns()) {
          partitionKeys.add(
              new FieldSchema(
                  eachPartitionKey.getSimpleName(),
                  HiveCatalogUtil.getHiveFieldType(eachPartitionKey.getDataType()),
                  ""));
        }
        table.setPartitionKeys(partitionKeys);
      }

      if (tableDesc.getMeta().getStoreType().equalsIgnoreCase(BuiltinStorages.RCFILE)) {
        String serde = tableDesc.getMeta().getOption(StorageConstants.RCFILE_SERDE);
        sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName());
        sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName());
        if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName());
        } else {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe.class.getName());
        }

        if (tableDesc.getMeta().getOptions().containsKey(StorageConstants.RCFILE_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.RCFILE_NULL)));
        }
      } else if (tableDesc.getMeta().getStoreType().equals(BuiltinStorages.TEXT)) {
        sd.getSerdeInfo()
            .setSerializationLib(
                org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
        sd.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class.getName());
        sd.setOutputFormat(
            org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat.class.getName());

        String fieldDelimiter =
            tableDesc
                .getMeta()
                .getOption(
                    StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);

        // User can use an unicode for filed delimiter such as \u0001, \001.
        // In this case, java console will convert this value into "\\u001".
        // And hive will un-espace this value again.
        // As a result, user can use right field delimiter.
        // So, we have to un-escape this value.
        sd.getSerdeInfo()
            .putToParameters(
                serdeConstants.SERIALIZATION_FORMAT,
                StringEscapeUtils.unescapeJava(fieldDelimiter));
        sd.getSerdeInfo()
            .putToParameters(
                serdeConstants.FIELD_DELIM, StringEscapeUtils.unescapeJava(fieldDelimiter));
        table.getParameters().remove(StorageConstants.TEXT_DELIMITER);

        if (tableDesc.getMeta().containsOption(StorageConstants.TEXT_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.TEXT_NULL)));
          table.getParameters().remove(StorageConstants.TEXT_NULL);
        }
      } else if (tableDesc
          .getMeta()
          .getStoreType()
          .equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) {
        String serde = tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_SERDE);
        sd.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName());
        sd.setOutputFormat(
            org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat.class.getName());

        if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());

          String fieldDelimiter =
              tableDesc
                  .getMeta()
                  .getOption(
                      StorageConstants.SEQUENCEFILE_DELIMITER,
                      StorageConstants.DEFAULT_FIELD_DELIMITER);

          // User can use an unicode for filed delimiter such as \u0001, \001.
          // In this case, java console will convert this value into "\\u001".
          // And hive will un-espace this value again.
          // As a result, user can use right field delimiter.
          // So, we have to un-escape this value.
          sd.getSerdeInfo()
              .putToParameters(
                  serdeConstants.SERIALIZATION_FORMAT,
                  StringEscapeUtils.unescapeJava(fieldDelimiter));
          sd.getSerdeInfo()
              .putToParameters(
                  serdeConstants.FIELD_DELIM, StringEscapeUtils.unescapeJava(fieldDelimiter));
          table.getParameters().remove(StorageConstants.SEQUENCEFILE_DELIMITER);
        } else {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class.getName());
        }

        if (tableDesc.getMeta().containsOption(StorageConstants.SEQUENCEFILE_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_NULL)));
          table.getParameters().remove(StorageConstants.SEQUENCEFILE_NULL);
        }
      } else {
        if (tableDesc.getMeta().getStoreType().equalsIgnoreCase(BuiltinStorages.PARQUET)) {
          sd.setInputFormat(parquet.hive.DeprecatedParquetInputFormat.class.getName());
          sd.setOutputFormat(parquet.hive.DeprecatedParquetOutputFormat.class.getName());
          sd.getSerdeInfo()
              .setSerializationLib(parquet.hive.serde.ParquetHiveSerDe.class.getName());
        } else {
          throw new UnsupportedException(
              tableDesc.getMeta().getStoreType() + " in HivecatalogStore");
        }
      }

      sd.setSortCols(new ArrayList<Order>());

      table.setSd(sd);
      client.getHiveClient().createTable(table);
    } catch (Throwable t) {
      throw new TajoInternalError(t);
    } finally {
      if (client != null) client.release();
    }
  }
  @Test
  public final void testNext() throws IOException, PlanningException {
    FileFragment[] frags =
        StorageManager.splitNG(
            conf, "employee", employee.getMeta(), employee.getPath(), Integer.MAX_VALUE);
    Path workDir = new Path(testDir, TestExternalSortExec.class.getName());
    TaskAttemptContext ctx =
        new TaskAttemptContext(
            conf,
            LocalTajoTestingUtility.newQueryUnitAttemptId(),
            new FileFragment[] {frags[0]},
            workDir);
    ctx.setEnforcer(new Enforcer());
    Expr expr = analyzer.parse(QUERIES[0]);
    LogicalPlan plan = planner.createPlan(expr);
    LogicalNode rootNode = plan.getRootBlock().getRoot();

    PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, sm);
    PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode);

    ProjectionExec proj = (ProjectionExec) exec;

    // TODO - should be planed with user's optimization hint
    if (!(proj.getChild() instanceof ExternalSortExec)) {
      UnaryPhysicalExec sortExec = proj.getChild();
      SeqScanExec scan = sortExec.getChild();

      ExternalSortExec extSort =
          new ExternalSortExec(ctx, sm, ((MemSortExec) sortExec).getPlan(), scan);
      proj.setChild(extSort);
    }

    Tuple tuple;
    Tuple preVal = null;
    Tuple curVal;
    int cnt = 0;
    exec.init();
    long start = System.currentTimeMillis();
    TupleComparator comparator =
        new TupleComparator(
            proj.getSchema(),
            new SortSpec[] {
              new SortSpec(new Column("managerId", Type.INT4)),
              new SortSpec(new Column("empId", Type.INT4))
            });

    while ((tuple = exec.next()) != null) {
      curVal = tuple;
      if (preVal != null) {
        assertTrue(
            "prev: " + preVal + ", but cur: " + curVal, comparator.compare(preVal, curVal) <= 0);
      }
      preVal = curVal;
      cnt++;
    }
    long end = System.currentTimeMillis();
    assertEquals(numTuple, cnt);

    // for rescan test
    preVal = null;
    exec.rescan();
    cnt = 0;
    while ((tuple = exec.next()) != null) {
      curVal = tuple;
      if (preVal != null) {
        assertTrue(
            "prev: " + preVal + ", but cur: " + curVal, comparator.compare(preVal, curVal) <= 0);
      }
      preVal = curVal;
      cnt++;
    }
    assertEquals(numTuple, cnt);
    exec.close();
    System.out.println("Sort Time: " + (end - start) + " msc");
  }