Beispiel #1
0
  private static StorageDescriptor makeStorageDescriptor(
      String tableName, List<Column> columns, Storage storage) {
    if (storage.isSorted() || storage.isSkewed()) {
      throw new IllegalArgumentException(
          "Writing to sorted and/or skewed table/partition is not supported");
    }
    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(tableName);
    serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable());
    serdeInfo.setParameters(storage.getSerdeParameters());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(emptyToNull(storage.getLocation()));
    sd.setCols(columns.stream().map(MetastoreUtil::toMetastoreApiFieldSchema).collect(toList()));
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable());
    sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable());
    sd.setParameters(ImmutableMap.of());

    Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty();
    if (bucketProperty.isPresent()) {
      sd.setNumBuckets(bucketProperty.get().getBucketCount());
      sd.setBucketCols(bucketProperty.get().getBucketedBy());
    }

    return sd;
  }
  private static void createTable(String tableName, String tablePerm) throws Exception {
    Table tbl = new Table();
    tbl.setDbName(DATABASE);
    tbl.setTableName(tableName);
    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(ColumnHolder.colMapping.get(tableName));
    tbl.setSd(sd);
    sd.setParameters(new HashMap<String, String>());
    sd.setSerdeInfo(new SerDeInfo());
    sd.getSerdeInfo().setName(tbl.getTableName());
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName());
    sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName());
    sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
    sd.getSerdeInfo()
        .setSerializationLib(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName());
    tbl.setPartitionKeys(ColumnHolder.partitionCols);

    hmsc.createTable(tbl);
    FileSystem fs = FileSystem.get(mrConf);
    fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm));
  }
  @Override
  public void commitCreateTable(
      ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments) {
    HiveOutputTableHandle handle =
        checkType(tableHandle, HiveOutputTableHandle.class, "tableHandle");

    // verify no one raced us to create the target directory
    Path targetPath = new Path(handle.getTargetPath());

    // rename if using a temporary directory
    if (handle.hasTemporaryPath()) {
      if (pathExists(targetPath)) {
        SchemaTableName table = new SchemaTableName(handle.getSchemaName(), handle.getTableName());
        throw new PrestoException(
            HIVE_PATH_ALREADY_EXISTS,
            format(
                "Unable to commit creation of table '%s': target directory already exists: %s",
                table, targetPath));
      }
      // rename the temporary directory to the target
      rename(new Path(handle.getTemporaryPath()), targetPath);
    }

    // create the table in the metastore
    List<String> types =
        handle
            .getColumnTypes()
            .stream()
            .map(HiveType::toHiveType)
            .map(HiveType::getHiveTypeName)
            .collect(toList());

    boolean sampled = false;
    ImmutableList.Builder<FieldSchema> columns = ImmutableList.builder();
    for (int i = 0; i < handle.getColumnNames().size(); i++) {
      String name = handle.getColumnNames().get(i);
      String type = types.get(i);
      if (name.equals(SAMPLE_WEIGHT_COLUMN_NAME)) {
        columns.add(new FieldSchema(name, type, "Presto sample weight column"));
        sampled = true;
      } else {
        columns.add(new FieldSchema(name, type, null));
      }
    }

    HiveStorageFormat hiveStorageFormat = handle.getHiveStorageFormat();

    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(handle.getTableName());
    serdeInfo.setSerializationLib(hiveStorageFormat.getSerDe());
    serdeInfo.setParameters(ImmutableMap.<String, String>of());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(targetPath.toString());
    sd.setCols(columns.build());
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(hiveStorageFormat.getInputFormat());
    sd.setOutputFormat(hiveStorageFormat.getOutputFormat());
    sd.setParameters(ImmutableMap.<String, String>of());

    Table table = new Table();
    table.setDbName(handle.getSchemaName());
    table.setTableName(handle.getTableName());
    table.setOwner(handle.getTableOwner());
    table.setTableType(TableType.MANAGED_TABLE.toString());
    String tableComment = "Created by Presto";
    if (sampled) {
      tableComment =
          "Sampled table created by Presto. Only query this table from Hive if you understand how Presto implements sampling.";
    }
    table.setParameters(ImmutableMap.of("comment", tableComment));
    table.setPartitionKeys(ImmutableList.<FieldSchema>of());
    table.setSd(sd);

    metastore.createTable(table);
  }
Beispiel #4
0
  public void setFieldValue(_Fields field, Object value) {
    switch (field) {
      case COLS:
        if (value == null) {
          unsetCols();
        } else {
          setCols((List<FieldSchema>) value);
        }
        break;

      case LOCATION:
        if (value == null) {
          unsetLocation();
        } else {
          setLocation((String) value);
        }
        break;

      case INPUT_FORMAT:
        if (value == null) {
          unsetInputFormat();
        } else {
          setInputFormat((String) value);
        }
        break;

      case OUTPUT_FORMAT:
        if (value == null) {
          unsetOutputFormat();
        } else {
          setOutputFormat((String) value);
        }
        break;

      case COMPRESSED:
        if (value == null) {
          unsetCompressed();
        } else {
          setCompressed((Boolean) value);
        }
        break;

      case NUM_BUCKETS:
        if (value == null) {
          unsetNumBuckets();
        } else {
          setNumBuckets((Integer) value);
        }
        break;

      case SERDE_INFO:
        if (value == null) {
          unsetSerdeInfo();
        } else {
          setSerdeInfo((SerDeInfo) value);
        }
        break;

      case BUCKET_COLS:
        if (value == null) {
          unsetBucketCols();
        } else {
          setBucketCols((List<String>) value);
        }
        break;

      case SORT_COLS:
        if (value == null) {
          unsetSortCols();
        } else {
          setSortCols((List<Order>) value);
        }
        break;

      case PARAMETERS:
        if (value == null) {
          unsetParameters();
        } else {
          setParameters((Map<String, String>) value);
        }
        break;
    }
  }
  /**
   * Verifies Impala is able to properly parse delimiters in supported formats. See
   * HdfsStorageDescriptor.parseDelim() for details.
   */
  @Test
  public void testDelimiters() throws InvalidStorageDescriptorException {
    StorageDescriptor sd =
        HiveStorageDescriptorFactory.createSd(THdfsFileFormat.TEXT, RowFormat.DEFAULT_ROW_FORMAT);
    sd.setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "-2");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "-128");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "127");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.LINE_DELIM, "\001");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "|");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "\t");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "ab");
    try {
      HdfsStorageDescriptor.fromStorageDescriptor("fake", sd);
      fail();
    } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) {
      assertEquals(
          "Invalid delimiter: 'ab'. Delimiter must be specified as a "
              + "single character or as a decimal value in the range [-128:127]",
          e.getMessage());
    }

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "128");
    try {
      HdfsStorageDescriptor.fromStorageDescriptor("fake", sd);
      fail();
    } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) {
      assertEquals(
          "Invalid delimiter: '128'. Delimiter must be specified as a "
              + "single character or as a decimal value in the range [-128:127]",
          e.getMessage());
    }

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "\128");
    try {
      HdfsStorageDescriptor.fromStorageDescriptor("fake", sd);
      fail();
    } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) {
      assertEquals(
          "Invalid delimiter: '\128'. Delimiter must be specified as a "
              + "single character or as a decimal value in the range [-128:127]",
          e.getMessage());
    }

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.LINE_DELIM, "-129");
    try {
      HdfsStorageDescriptor.fromStorageDescriptor("fake", sd);
      fail();
    } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) {
      assertEquals(
          "Invalid delimiter: '-129'. Delimiter must be specified as a "
              + "single character or as a decimal value in the range [-128:127]",
          e.getMessage());
    }
  }
  @Test
  public void testHiveLocalMetaStore() {

    // Create a table and display it back
    try {
      HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveLocalMetaStore.getHiveConf());

      hiveClient.dropTable(
          propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY),
          propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY),
          true,
          true);

      // Define the cols
      List<FieldSchema> cols = new ArrayList<FieldSchema>();
      cols.add(new FieldSchema("id", serdeConstants.INT_TYPE_NAME, ""));
      cols.add(new FieldSchema("msg", serdeConstants.STRING_TYPE_NAME, ""));

      // Values for the StorageDescriptor
      String location =
          new File(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY))
              .getAbsolutePath();
      String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
      String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
      int numBuckets = 16;
      Map<String, String> orcProps = new HashMap<String, String>();
      orcProps.put("orc.compress", "NONE");
      SerDeInfo serDeInfo =
          new SerDeInfo(OrcSerde.class.getSimpleName(), OrcSerde.class.getName(), orcProps);
      List<String> bucketCols = new ArrayList<String>();
      bucketCols.add("id");

      // Build the StorageDescriptor
      StorageDescriptor sd = new StorageDescriptor();
      sd.setCols(cols);
      sd.setLocation(location);
      sd.setInputFormat(inputFormat);
      sd.setOutputFormat(outputFormat);
      sd.setNumBuckets(numBuckets);
      sd.setSerdeInfo(serDeInfo);
      sd.setBucketCols(bucketCols);
      sd.setSortCols(new ArrayList<Order>());
      sd.setParameters(new HashMap<String, String>());

      // Define the table
      Table tbl = new Table();
      tbl.setDbName(propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY));
      tbl.setTableName(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY));
      tbl.setSd(sd);
      tbl.setOwner(System.getProperty("user.name"));
      tbl.setParameters(new HashMap<String, String>());
      tbl.setViewOriginalText("");
      tbl.setViewExpandedText("");
      tbl.setTableType(TableType.EXTERNAL_TABLE.name());
      List<FieldSchema> partitions = new ArrayList<FieldSchema>();
      partitions.add(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, ""));
      tbl.setPartitionKeys(partitions);

      // Create the table
      hiveClient.createTable(tbl);

      // Describe the table
      Table createdTable =
          hiveClient.getTable(
              propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY),
              propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY));
      LOG.info("HIVE: Created Table: {}", createdTable.toString());
      assertThat(
          createdTable.toString(),
          containsString(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY)));

    } catch (MetaException e) {
      e.printStackTrace();
    } catch (TException e) {
      e.printStackTrace();
    }
  }