Beispiel #1
0
  public static Partition fromMetastoreApiPartition(
      org.apache.hadoop.hive.metastore.api.Partition partition) {
    StorageDescriptor storageDescriptor = partition.getSd();
    if (storageDescriptor == null) {
      throw new PrestoException(
          HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
    }

    Partition.Builder partitionBuilder =
        Partition.builder()
            .setDatabaseName(partition.getDbName())
            .setTableName(partition.getTableName())
            .setValues(partition.getValues())
            .setColumns(
                storageDescriptor
                    .getCols()
                    .stream()
                    .map(MetastoreUtil::fromMetastoreApiFieldSchema)
                    .collect(toList()))
            .setParameters(partition.getParameters());

    fromMetastoreApiStorageDescriptor(
        storageDescriptor,
        partitionBuilder.getStorageBuilder(),
        format("%s.%s", partition.getTableName(), partition.getValues()));

    return partitionBuilder.build();
  }
Beispiel #2
0
  private void addPartition(
      String databaseName, String tableName, CatalogProtos.PartitionDescProto partitionDescProto) {
    HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null;
    try {

      client = clientPool.getClient();

      Partition partition = new Partition();
      partition.setDbName(databaseName);
      partition.setTableName(tableName);

      List<String> values = Lists.newArrayList();
      for (CatalogProtos.PartitionKeyProto keyProto : partitionDescProto.getPartitionKeysList()) {
        values.add(keyProto.getPartitionValue());
      }
      partition.setValues(values);

      Table table = client.getHiveClient().getTable(databaseName, tableName);
      StorageDescriptor sd = table.getSd();
      sd.setLocation(partitionDescProto.getPath());
      partition.setSd(sd);

      client.getHiveClient().add_partition(partition);
    } catch (Exception e) {
      throw new TajoInternalError(e);
    } finally {
      if (client != null) {
        client.release();
      }
    }
  }
Beispiel #3
0
  public static Table fromMetastoreApiTable(org.apache.hadoop.hive.metastore.api.Table table) {
    StorageDescriptor storageDescriptor = table.getSd();
    if (storageDescriptor == null) {
      throw new PrestoException(HIVE_INVALID_METADATA, "Table is missing storage descriptor");
    }

    Table.Builder tableBuilder =
        Table.builder()
            .setDatabaseName(table.getDbName())
            .setTableName(table.getTableName())
            .setOwner(nullToEmpty(table.getOwner()))
            .setTableType(table.getTableType())
            .setDataColumns(
                storageDescriptor
                    .getCols()
                    .stream()
                    .map(MetastoreUtil::fromMetastoreApiFieldSchema)
                    .collect(toList()))
            .setPartitionColumns(
                table
                    .getPartitionKeys()
                    .stream()
                    .map(MetastoreUtil::fromMetastoreApiFieldSchema)
                    .collect(toList()))
            .setParameters(
                table.getParameters() == null ? ImmutableMap.of() : table.getParameters())
            .setViewOriginalText(Optional.ofNullable(emptyToNull(table.getViewOriginalText())))
            .setViewExpandedText(Optional.ofNullable(emptyToNull(table.getViewExpandedText())));

    fromMetastoreApiStorageDescriptor(
        storageDescriptor, tableBuilder.getStorageBuilder(), table.getTableName());

    return tableBuilder.build();
  }
Beispiel #4
0
  private static void fromMetastoreApiStorageDescriptor(
      StorageDescriptor storageDescriptor, Storage.Builder builder, String tablePartitionName) {
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
      throw new PrestoException(
          HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    builder
        .setStorageFormat(
            StorageFormat.createNullable(
                serdeInfo.getSerializationLib(),
                storageDescriptor.getInputFormat(),
                storageDescriptor.getOutputFormat()))
        .setLocation(nullToEmpty(storageDescriptor.getLocation()))
        .setBucketProperty(
            HiveBucketProperty.fromStorageDescriptor(storageDescriptor, tablePartitionName))
        .setSorted(storageDescriptor.isSetSortCols() && !storageDescriptor.getSortCols().isEmpty())
        .setSkewed(
            storageDescriptor.isSetSkewedInfo()
                && storageDescriptor.getSkewedInfo().isSetSkewedColNames()
                && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty())
        .setSerdeParameters(
            serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters());
  }
Beispiel #5
0
  private static StorageDescriptor makeStorageDescriptor(
      String tableName, List<Column> columns, Storage storage) {
    if (storage.isSorted() || storage.isSkewed()) {
      throw new IllegalArgumentException(
          "Writing to sorted and/or skewed table/partition is not supported");
    }
    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(tableName);
    serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable());
    serdeInfo.setParameters(storage.getSerdeParameters());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(emptyToNull(storage.getLocation()));
    sd.setCols(columns.stream().map(MetastoreUtil::toMetastoreApiFieldSchema).collect(toList()));
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable());
    sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable());
    sd.setParameters(ImmutableMap.of());

    Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty();
    if (bucketProperty.isPresent()) {
      sd.setNumBuckets(bucketProperty.get().getBucketCount());
      sd.setBucketCols(bucketProperty.get().getBucketedBy());
    }

    return sd;
  }
  @Before
  public void createTable() throws Exception {
    // Use Junit's Assume to skip running this fixture against any storage formats whose
    // SerDe is in the disabled serdes list.
    Assume.assumeTrue(!DISABLED_SERDES.contains(serdeClass));

    String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName;
    try {
      client.dropTable(databaseName, tableName);
    } catch (Exception e) {
      // Can fail with NoSuchObjectException.
    }

    Table tbl = new Table();
    tbl.setDbName(databaseName);
    tbl.setTableName(tableName);
    if (isTableExternal()) {
      tbl.setTableType(TableType.EXTERNAL_TABLE.toString());
    } else {
      tbl.setTableType(TableType.MANAGED_TABLE.toString());
    }
    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(getTableColumns());

    tbl.setPartitionKeys(getPartitionKeys());
    tbl.setSd(sd);

    sd.setBucketCols(new ArrayList<String>(2));
    sd.setSerdeInfo(new SerDeInfo());
    sd.getSerdeInfo().setName(tbl.getTableName());
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
    if (isTableExternal()) {
      sd.getSerdeInfo().getParameters().put("EXTERNAL", "TRUE");
    }
    sd.getSerdeInfo().setSerializationLib(serdeClass);
    sd.setInputFormat(inputFormatClass);
    sd.setOutputFormat(outputFormatClass);

    Map<String, String> tableParams = new HashMap<String, String>();
    if (isTableExternal()) {
      tableParams.put("EXTERNAL", "TRUE");
    }
    if (isTableImmutable()) {
      tableParams.put(hive_metastoreConstants.IS_IMMUTABLE, "true");
    }
    StatsSetupConst.setBasicStatsState(tableParams, StatsSetupConst.TRUE);
    tbl.setParameters(tableParams);

    client.createTable(tbl);
  }
Beispiel #7
0
  /** Performs a deep copy on <i>other</i>. */
  public StorageDescriptor(StorageDescriptor other) {
    __isset_bit_vector.clear();
    __isset_bit_vector.or(other.__isset_bit_vector);
    if (other.isSetCols()) {
      List<FieldSchema> __this__cols = new ArrayList<FieldSchema>();
      for (FieldSchema other_element : other.cols) {
        __this__cols.add(new FieldSchema(other_element));
      }
      this.cols = __this__cols;
    }
    if (other.isSetLocation()) {
      this.location = other.location;
    }
    if (other.isSetInputFormat()) {
      this.inputFormat = other.inputFormat;
    }
    if (other.isSetOutputFormat()) {
      this.outputFormat = other.outputFormat;
    }
    this.compressed = other.compressed;
    this.numBuckets = other.numBuckets;
    if (other.isSetSerdeInfo()) {
      this.serdeInfo = new SerDeInfo(other.serdeInfo);
    }
    if (other.isSetBucketCols()) {
      List<String> __this__bucketCols = new ArrayList<String>();
      for (String other_element : other.bucketCols) {
        __this__bucketCols.add(other_element);
      }
      this.bucketCols = __this__bucketCols;
    }
    if (other.isSetSortCols()) {
      List<Order> __this__sortCols = new ArrayList<Order>();
      for (Order other_element : other.sortCols) {
        __this__sortCols.add(new Order(other_element));
      }
      this.sortCols = __this__sortCols;
    }
    if (other.isSetParameters()) {
      Map<String, String> __this__parameters = new HashMap<String, String>();
      for (Map.Entry<String, String> other_element : other.parameters.entrySet()) {

        String other_element_key = other_element.getKey();
        String other_element_value = other_element.getValue();

        String __this__parameters_copy_key = other_element_key;

        String __this__parameters_copy_value = other_element_value;

        __this__parameters.put(__this__parameters_copy_key, __this__parameters_copy_value);
      }
      this.parameters = __this__parameters;
    }
  }
Beispiel #8
0
 /**
  * This code block iterates over indexes on the table and populates the indexToKeys map for all
  * the indexes that satisfy the rewrite criteria.
  *
  * @param indexTables
  * @return
  * @throws SemanticException
  */
 Map<Index, Set<String>> getIndexToKeysMap(List<Index> indexTables) throws SemanticException {
   Index index = null;
   Hive hiveInstance = hiveDb;
   Map<Index, Set<String>> indexToKeysMap = new LinkedHashMap<Index, Set<String>>();
   for (int idxCtr = 0; idxCtr < indexTables.size(); idxCtr++) {
     final Set<String> indexKeyNames = new LinkedHashSet<String>();
     index = indexTables.get(idxCtr);
     // Getting index key columns
     StorageDescriptor sd = index.getSd();
     List<FieldSchema> idxColList = sd.getCols();
     for (FieldSchema fieldSchema : idxColList) {
       indexKeyNames.add(fieldSchema.getName());
     }
     assert indexKeyNames.size() == 1;
     // Check that the index schema is as expected. This code block should
     // catch problems of this rewrite breaking when the AggregateIndexHandler
     // index is changed.
     List<String> idxTblColNames = new ArrayList<String>();
     try {
       Table idxTbl = hiveInstance.getTable(index.getDbName(), index.getIndexTableName());
       for (FieldSchema idxTblCol : idxTbl.getCols()) {
         idxTblColNames.add(idxTblCol.getName());
       }
     } catch (HiveException e) {
       LOG.error(
           "Got exception while locating index table, "
               + "skipping "
               + getName()
               + " optimization");
       LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
       throw new SemanticException(e.getMessage(), e);
     }
     assert (idxTblColNames.contains(IDX_BUCKET_COL));
     assert (idxTblColNames.contains(IDX_OFFSETS_ARRAY_COL));
     // we add all index tables which can be used for rewrite
     // and defer the decision of using a particular index for later
     // this is to allow choosing a index if a better mechanism is
     // designed later to chose a better rewrite
     indexToKeysMap.put(index, indexKeyNames);
   }
   return indexToKeysMap;
 }
  static StorerInfo extractStorerInfo(StorageDescriptor sd, Map<String, String> properties)
      throws IOException {
    Properties hcatProperties = new Properties();
    for (String key : properties.keySet()) {
      hcatProperties.put(key, properties.get(key));
    }

    // also populate with StorageDescriptor->SerDe.Parameters
    for (Map.Entry<String, String> param : sd.getSerdeInfo().getParameters().entrySet()) {
      hcatProperties.put(param.getKey(), param.getValue());
    }

    return new StorerInfo(
        sd.getInputFormat(),
        sd.getOutputFormat(),
        sd.getSerdeInfo().getSerializationLib(),
        properties.get(
            org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE),
        hcatProperties);
  }
  @Override
  public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata) {
    checkArgument(!isNullOrEmpty(tableMetadata.getOwner()), "Table owner is null or empty");

    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();

    ImmutableList.Builder<String> columnNames = ImmutableList.builder();
    ImmutableList.Builder<Type> columnTypes = ImmutableList.builder();

    buildColumnInfo(tableMetadata, columnNames, columnTypes);

    ImmutableList.Builder<FieldSchema> partitionKeys = ImmutableList.builder();
    ImmutableList.Builder<FieldSchema> columns = ImmutableList.builder();

    List<String> names = columnNames.build();
    List<String> typeNames =
        columnTypes
            .build()
            .stream()
            .map(HiveType::toHiveType)
            .map(HiveType::getHiveTypeName)
            .collect(toList());

    for (int i = 0; i < names.size(); i++) {
      if (tableMetadata.getColumns().get(i).isPartitionKey()) {
        partitionKeys.add(new FieldSchema(names.get(i), typeNames.get(i), null));
      } else {
        columns.add(new FieldSchema(names.get(i), typeNames.get(i), null));
      }
    }

    Path targetPath = getTargetPath(schemaName, tableName, schemaTableName);

    HiveStorageFormat hiveStorageFormat = getHiveStorageFormat(session, this.hiveStorageFormat);
    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(tableName);
    serdeInfo.setSerializationLib(hiveStorageFormat.getSerDe());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(targetPath.toString());

    sd.setCols(columns.build());
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(hiveStorageFormat.getInputFormat());
    sd.setOutputFormat(hiveStorageFormat.getOutputFormat());

    Table table = new Table();
    table.setDbName(schemaName);
    table.setTableName(tableName);
    table.setOwner(tableMetadata.getOwner());
    table.setTableType(TableType.MANAGED_TABLE.toString());
    String tableComment = "Created by Presto";
    table.setParameters(ImmutableMap.of("comment", tableComment));
    table.setPartitionKeys(partitionKeys.build());
    table.setSd(sd);

    metastore.createTable(table);
  }
  @Override
  public void createView(
      ConnectorSession session, SchemaTableName viewName, String viewData, boolean replace) {
    if (replace) {
      try {
        dropView(session, viewName);
      } catch (ViewNotFoundException ignored) {
      }
    }

    Map<String, String> properties =
        ImmutableMap.<String, String>builder()
            .put("comment", "Presto View")
            .put(PRESTO_VIEW_FLAG, "true")
            .build();

    FieldSchema dummyColumn = new FieldSchema("dummy", STRING_TYPE_NAME, null);

    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(ImmutableList.of(dummyColumn));
    sd.setSerdeInfo(new SerDeInfo());

    Table table = new Table();
    table.setDbName(viewName.getSchemaName());
    table.setTableName(viewName.getTableName());
    table.setOwner(session.getUser());
    table.setTableType(TableType.VIRTUAL_VIEW.name());
    table.setParameters(properties);
    table.setViewOriginalText(encodeViewData(viewData));
    table.setViewExpandedText("/* Presto View */");
    table.setSd(sd);

    try {
      metastore.createTable(table);
    } catch (TableAlreadyExistsException e) {
      throw new ViewAlreadyExistsException(e.getTableName());
    }
  }
  /**
   * Tests that Impala is able to create an HdfsStorageDescriptor using all combinations of Parquet
   * SerDe class name + input/output format class name.
   */
  @Test
  public void testParquetFileFormat()
      throws DatabaseNotFoundException, InvalidStorageDescriptorException {
    String[] parquetSerDe =
        new String[] {
          "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
          "parquet.hive.serde.ParquetHiveSerDe"
        };
    String[] inputFormats =
        new String[] {
          "com.cloudera.impala.hive.serde.ParquetInputFormat",
          "parquet.hive.DeprecatedParquetInputFormat",
          "parquet.hive.MapredParquetInputFormat",
          "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"
        };
    String[] outputFormats =
        new String[] {
          "com.cloudera.impala.hive.serde.ParquetOutputFormat",
          "parquet.hive.DeprecatedParquetOutputFormat",
          "parquet.hive.MapredParquetOutputFormat",
          "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
        };

    for (String serDe : parquetSerDe) {
      SerDeInfo serDeInfo = new SerDeInfo();
      serDeInfo.setSerializationLib(serDe);
      serDeInfo.setParameters(new HashMap<String, String>());
      for (String inputFormat : inputFormats) {
        for (String outputFormat : outputFormats) {
          StorageDescriptor sd = new StorageDescriptor();
          sd.setSerdeInfo(serDeInfo);
          sd.setInputFormat(inputFormat);
          sd.setOutputFormat(outputFormat);
          assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTblName", sd));
        }
      }
    }
  }
  private static void createTable(String tableName, String tablePerm) throws Exception {
    Table tbl = new Table();
    tbl.setDbName(DATABASE);
    tbl.setTableName(tableName);
    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(ColumnHolder.colMapping.get(tableName));
    tbl.setSd(sd);
    sd.setParameters(new HashMap<String, String>());
    sd.setSerdeInfo(new SerDeInfo());
    sd.getSerdeInfo().setName(tbl.getTableName());
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName());
    sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName());
    sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
    sd.getSerdeInfo()
        .setSerializationLib(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName());
    tbl.setPartitionKeys(ColumnHolder.partitionCols);

    hmsc.createTable(tbl);
    FileSystem fs = FileSystem.get(mrConf);
    fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm));
  }
Beispiel #14
0
 /** Returns the RowFormat for the storage descriptor. */
 public static RowFormat fromStorageDescriptor(StorageDescriptor sd) {
   Preconditions.checkNotNull(sd);
   Map<String, String> params = sd.getSerdeInfo().getParameters();
   return new RowFormat(
       params.get("field.delim"), params.get("line.delim"), params.get("escape.delim"));
 }
Beispiel #15
0
  /**
   * Adds a new HdfsPartition to internal partition list, populating with file format information
   * and file locations. If a partition contains no files, it's not added. For unchanged files
   * (indicated by unchanged mtime), reuses the FileDescriptor from the oldFileDescMap. Otherwise,
   * creates a new FileDescriptor for each modified or new file and adds it to newFileDescs. Returns
   * new partition or null, if none was added.
   *
   * @throws InvalidStorageDescriptorException if the supplied storage descriptor contains metadata
   *     that Impala can't understand.
   */
  private HdfsPartition addPartition(
      StorageDescriptor storageDescriptor,
      org.apache.hadoop.hive.metastore.api.Partition msPartition,
      List<LiteralExpr> partitionKeyExprs,
      Map<String, FileDescriptor> oldFileDescMap,
      List<FileDescriptor> newFileDescs)
      throws IOException, InvalidStorageDescriptorException {
    HdfsStorageDescriptor fileFormatDescriptor =
        HdfsStorageDescriptor.fromStorageDescriptor(this.name_, storageDescriptor);
    Path partDirPath = new Path(storageDescriptor.getLocation());
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();
    if (DFS.exists(partDirPath)) {
      // DistributedFilesystem does not have an API that takes in a timestamp and return
      // a list of files that has been added/changed since. Therefore, we are calling
      // DFS.listStatus() to list all the files.
      for (FileStatus fileStatus : DFS.listStatus(partDirPath)) {
        String fileName = fileStatus.getPath().getName().toString();
        if (fileStatus.isDirectory()
            || FileSystemUtil.isHiddenFile(fileName)
            || HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) {
          // Ignore directory, hidden file starting with . or _, and LZO index files
          // If a directory is erroneously created as a subdirectory of a partition dir
          // we should ignore it and move on. Hive will not recurse into directories.
          // Skip index files, these are read by the LZO scanner directly.
          continue;
        }

        String fullPath = fileStatus.getPath().toString();
        FileDescriptor fd = (oldFileDescMap != null) ? oldFileDescMap.get(fullPath) : null;
        if (fd != null
            && fd.getFileLength() == fileStatus.getLen()
            && fd.getModificationTime() == fileStatus.getModificationTime()) {
          // Reuse the old file descriptor along with its block metadata if the file
          // length and mtime has not been changed.
        } else {
          // Create a new file descriptor. The block metadata will be populated by
          // loadFileDescriptorsBlockMd.
          fd = new FileDescriptor(fullPath, fileStatus.getLen(), fileStatus.getModificationTime());
          newFileDescs.add(fd);
        }
        fileDescriptors.add(fd);
        fileDescMap_.put(fullPath, fd);
      }

      HdfsPartition partition =
          new HdfsPartition(
              this,
              msPartition,
              partitionKeyExprs,
              fileFormatDescriptor,
              fileDescriptors,
              getAvailableAccessLevel(partDirPath));
      partitions_.add(partition);
      numHdfsFiles_ += fileDescriptors.size();
      totalHdfsBytes_ += partition.getSize();
      return partition;
    } else {
      LOG.warn("Path " + partDirPath + " does not exist for partition. Ignoring.");
      return null;
    }
  }
Beispiel #16
0
  /**
   * Run a compactor job.
   *
   * @param conf Hive configuration file
   * @param jobName name to run this job with
   * @param t metastore table
   * @param sd metastore storage descriptor
   * @param txns list of valid transactions
   * @param isMajor is this a major compaction?
   * @throws java.io.IOException if the job fails
   */
  void run(
      HiveConf conf,
      String jobName,
      Table t,
      StorageDescriptor sd,
      ValidTxnList txns,
      boolean isMajor,
      Worker.StatsUpdater su)
      throws IOException {
    JobConf job = new JobConf(conf);
    job.setJobName(jobName);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setJarByClass(CompactorMR.class);
    LOG.debug("User jar set to " + job.getJar());
    job.setMapperClass(CompactorMap.class);
    job.setNumReduceTasks(0);
    job.setInputFormat(CompactorInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setOutputCommitter(CompactorOutputCommitter.class);

    String queueName = conf.getVar(HiveConf.ConfVars.COMPACTOR_JOB_QUEUE);
    if (queueName != null && queueName.length() > 0) {
      job.setQueueName(queueName);
    }

    job.set(FINAL_LOCATION, sd.getLocation());
    job.set(TMP_LOCATION, sd.getLocation() + "/" + TMPDIR + "_" + UUID.randomUUID().toString());
    job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat());
    job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat());
    job.setBoolean(IS_MAJOR, isMajor);
    job.setBoolean(IS_COMPRESSED, sd.isCompressed());
    job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString());
    job.setInt(NUM_BUCKETS, sd.getNumBuckets());
    job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
    setColumnTypes(job, sd.getCols());

    // Figure out and encode what files we need to read.  We do this here (rather than in
    // getSplits below) because as part of this we discover our minimum and maximum transactions,
    // and discovering that in getSplits is too late as we then have no way to pass it to our
    // mapper.

    AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(sd.getLocation()), conf, txns, false);
    StringableList dirsToSearch = new StringableList();
    Path baseDir = null;
    if (isMajor) {
      // There may not be a base dir if the partition was empty before inserts or if this
      // partition is just now being converted to ACID.
      baseDir = dir.getBaseDirectory();
      if (baseDir == null) {
        List<HdfsFileStatusWithId> originalFiles = dir.getOriginalFiles();
        if (!(originalFiles == null) && !(originalFiles.size() == 0)) {
          // There are original format files
          for (HdfsFileStatusWithId stat : originalFiles) {
            Path path = stat.getFileStatus().getPath();
            dirsToSearch.add(path);
            LOG.debug("Adding original file " + path + " to dirs to search");
          }
          // Set base to the location so that the input format reads the original files.
          baseDir = new Path(sd.getLocation());
        }
      } else {
        // add our base to the list of directories to search for files in.
        LOG.debug("Adding base directory " + baseDir + " to dirs to search");
        dirsToSearch.add(baseDir);
      }
    }

    List<AcidUtils.ParsedDelta> parsedDeltas = dir.getCurrentDirectories();

    if (parsedDeltas == null || parsedDeltas.size() == 0) {
      // Seriously, no deltas?  Can't compact that.
      LOG.error("No delta files found to compact in " + sd.getLocation());
      return;
    }

    StringableList deltaDirs = new StringableList();
    long minTxn = Long.MAX_VALUE;
    long maxTxn = Long.MIN_VALUE;
    for (AcidUtils.ParsedDelta delta : parsedDeltas) {
      LOG.debug("Adding delta " + delta.getPath() + " to directories to search");
      dirsToSearch.add(delta.getPath());
      deltaDirs.add(delta.getPath());
      minTxn = Math.min(minTxn, delta.getMinTransaction());
      maxTxn = Math.max(maxTxn, delta.getMaxTransaction());
    }

    if (baseDir != null) job.set(BASE_DIR, baseDir.toString());
    job.set(DELTA_DIRS, deltaDirs.toString());
    job.set(DIRS_TO_SEARCH, dirsToSearch.toString());
    job.setLong(MIN_TXN, minTxn);
    job.setLong(MAX_TXN, maxTxn);
    LOG.debug("Setting minimum transaction to " + minTxn);
    LOG.debug("Setting maximume transaction to " + maxTxn);

    RunningJob rj = JobClient.runJob(job);
    LOG.info(
        "Submitted "
            + (isMajor ? CompactionType.MAJOR : CompactionType.MINOR)
            + " compaction job '"
            + jobName
            + "' with jobID="
            + rj.getID()
            + " to "
            + job.getQueueName()
            + " queue.  "
            + "(current delta dirs count="
            + dir.getCurrentDirectories().size()
            + ", obsolete delta dirs count="
            + dir.getObsolete());
    rj.waitForCompletion();
    su.gatherStats();
  }
  @Override
  public void commitCreateTable(
      ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments) {
    HiveOutputTableHandle handle =
        checkType(tableHandle, HiveOutputTableHandle.class, "tableHandle");

    // verify no one raced us to create the target directory
    Path targetPath = new Path(handle.getTargetPath());

    // rename if using a temporary directory
    if (handle.hasTemporaryPath()) {
      if (pathExists(targetPath)) {
        SchemaTableName table = new SchemaTableName(handle.getSchemaName(), handle.getTableName());
        throw new PrestoException(
            HIVE_PATH_ALREADY_EXISTS,
            format(
                "Unable to commit creation of table '%s': target directory already exists: %s",
                table, targetPath));
      }
      // rename the temporary directory to the target
      rename(new Path(handle.getTemporaryPath()), targetPath);
    }

    // create the table in the metastore
    List<String> types =
        handle
            .getColumnTypes()
            .stream()
            .map(HiveType::toHiveType)
            .map(HiveType::getHiveTypeName)
            .collect(toList());

    boolean sampled = false;
    ImmutableList.Builder<FieldSchema> columns = ImmutableList.builder();
    for (int i = 0; i < handle.getColumnNames().size(); i++) {
      String name = handle.getColumnNames().get(i);
      String type = types.get(i);
      if (name.equals(SAMPLE_WEIGHT_COLUMN_NAME)) {
        columns.add(new FieldSchema(name, type, "Presto sample weight column"));
        sampled = true;
      } else {
        columns.add(new FieldSchema(name, type, null));
      }
    }

    HiveStorageFormat hiveStorageFormat = handle.getHiveStorageFormat();

    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(handle.getTableName());
    serdeInfo.setSerializationLib(hiveStorageFormat.getSerDe());
    serdeInfo.setParameters(ImmutableMap.<String, String>of());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(targetPath.toString());
    sd.setCols(columns.build());
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(hiveStorageFormat.getInputFormat());
    sd.setOutputFormat(hiveStorageFormat.getOutputFormat());
    sd.setParameters(ImmutableMap.<String, String>of());

    Table table = new Table();
    table.setDbName(handle.getSchemaName());
    table.setTableName(handle.getTableName());
    table.setOwner(handle.getTableOwner());
    table.setTableType(TableType.MANAGED_TABLE.toString());
    String tableComment = "Created by Presto";
    if (sampled) {
      tableComment =
          "Sampled table created by Presto. Only query this table from Hive if you understand how Presto implements sampling.";
    }
    table.setParameters(ImmutableMap.of("comment", tableComment));
    table.setPartitionKeys(ImmutableList.<FieldSchema>of());
    table.setSd(sd);

    metastore.createTable(table);
  }
  /**
   * Verifies Impala is able to properly parse delimiters in supported formats. See
   * HdfsStorageDescriptor.parseDelim() for details.
   */
  @Test
  public void testDelimiters() throws InvalidStorageDescriptorException {
    StorageDescriptor sd =
        HiveStorageDescriptorFactory.createSd(THdfsFileFormat.TEXT, RowFormat.DEFAULT_ROW_FORMAT);
    sd.setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "-2");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "-128");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "127");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.LINE_DELIM, "\001");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "|");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "\t");
    assertNotNull(HdfsStorageDescriptor.fromStorageDescriptor("fakeTbl", sd));

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "ab");
    try {
      HdfsStorageDescriptor.fromStorageDescriptor("fake", sd);
      fail();
    } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) {
      assertEquals(
          "Invalid delimiter: 'ab'. Delimiter must be specified as a "
              + "single character or as a decimal value in the range [-128:127]",
          e.getMessage());
    }

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "128");
    try {
      HdfsStorageDescriptor.fromStorageDescriptor("fake", sd);
      fail();
    } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) {
      assertEquals(
          "Invalid delimiter: '128'. Delimiter must be specified as a "
              + "single character or as a decimal value in the range [-128:127]",
          e.getMessage());
    }

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM, "\128");
    try {
      HdfsStorageDescriptor.fromStorageDescriptor("fake", sd);
      fail();
    } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) {
      assertEquals(
          "Invalid delimiter: '\128'. Delimiter must be specified as a "
              + "single character or as a decimal value in the range [-128:127]",
          e.getMessage());
    }

    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().putToParameters(serdeConstants.LINE_DELIM, "-129");
    try {
      HdfsStorageDescriptor.fromStorageDescriptor("fake", sd);
      fail();
    } catch (HdfsStorageDescriptor.InvalidStorageDescriptorException e) {
      assertEquals(
          "Invalid delimiter: '-129'. Delimiter must be specified as a "
              + "single character or as a decimal value in the range [-128:127]",
          e.getMessage());
    }
  }
Beispiel #19
0
  public boolean equals(StorageDescriptor that) {
    if (that == null) return false;

    boolean this_present_cols = true && this.isSetCols();
    boolean that_present_cols = true && that.isSetCols();
    if (this_present_cols || that_present_cols) {
      if (!(this_present_cols && that_present_cols)) return false;
      if (!this.cols.equals(that.cols)) return false;
    }

    boolean this_present_location = true && this.isSetLocation();
    boolean that_present_location = true && that.isSetLocation();
    if (this_present_location || that_present_location) {
      if (!(this_present_location && that_present_location)) return false;
      if (!this.location.equals(that.location)) return false;
    }

    boolean this_present_inputFormat = true && this.isSetInputFormat();
    boolean that_present_inputFormat = true && that.isSetInputFormat();
    if (this_present_inputFormat || that_present_inputFormat) {
      if (!(this_present_inputFormat && that_present_inputFormat)) return false;
      if (!this.inputFormat.equals(that.inputFormat)) return false;
    }

    boolean this_present_outputFormat = true && this.isSetOutputFormat();
    boolean that_present_outputFormat = true && that.isSetOutputFormat();
    if (this_present_outputFormat || that_present_outputFormat) {
      if (!(this_present_outputFormat && that_present_outputFormat)) return false;
      if (!this.outputFormat.equals(that.outputFormat)) return false;
    }

    boolean this_present_compressed = true;
    boolean that_present_compressed = true;
    if (this_present_compressed || that_present_compressed) {
      if (!(this_present_compressed && that_present_compressed)) return false;
      if (this.compressed != that.compressed) return false;
    }

    boolean this_present_numBuckets = true;
    boolean that_present_numBuckets = true;
    if (this_present_numBuckets || that_present_numBuckets) {
      if (!(this_present_numBuckets && that_present_numBuckets)) return false;
      if (this.numBuckets != that.numBuckets) return false;
    }

    boolean this_present_serdeInfo = true && this.isSetSerdeInfo();
    boolean that_present_serdeInfo = true && that.isSetSerdeInfo();
    if (this_present_serdeInfo || that_present_serdeInfo) {
      if (!(this_present_serdeInfo && that_present_serdeInfo)) return false;
      if (!this.serdeInfo.equals(that.serdeInfo)) return false;
    }

    boolean this_present_bucketCols = true && this.isSetBucketCols();
    boolean that_present_bucketCols = true && that.isSetBucketCols();
    if (this_present_bucketCols || that_present_bucketCols) {
      if (!(this_present_bucketCols && that_present_bucketCols)) return false;
      if (!this.bucketCols.equals(that.bucketCols)) return false;
    }

    boolean this_present_sortCols = true && this.isSetSortCols();
    boolean that_present_sortCols = true && that.isSetSortCols();
    if (this_present_sortCols || that_present_sortCols) {
      if (!(this_present_sortCols && that_present_sortCols)) return false;
      if (!this.sortCols.equals(that.sortCols)) return false;
    }

    boolean this_present_parameters = true && this.isSetParameters();
    boolean that_present_parameters = true && that.isSetParameters();
    if (this_present_parameters || that_present_parameters) {
      if (!(this_present_parameters && that_present_parameters)) return false;
      if (!this.parameters.equals(that.parameters)) return false;
    }

    return true;
  }
  @Test
  public void testHiveLocalMetaStore() {

    // Create a table and display it back
    try {
      HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveLocalMetaStore.getHiveConf());

      hiveClient.dropTable(
          propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY),
          propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY),
          true,
          true);

      // Define the cols
      List<FieldSchema> cols = new ArrayList<FieldSchema>();
      cols.add(new FieldSchema("id", serdeConstants.INT_TYPE_NAME, ""));
      cols.add(new FieldSchema("msg", serdeConstants.STRING_TYPE_NAME, ""));

      // Values for the StorageDescriptor
      String location =
          new File(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY))
              .getAbsolutePath();
      String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
      String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
      int numBuckets = 16;
      Map<String, String> orcProps = new HashMap<String, String>();
      orcProps.put("orc.compress", "NONE");
      SerDeInfo serDeInfo =
          new SerDeInfo(OrcSerde.class.getSimpleName(), OrcSerde.class.getName(), orcProps);
      List<String> bucketCols = new ArrayList<String>();
      bucketCols.add("id");

      // Build the StorageDescriptor
      StorageDescriptor sd = new StorageDescriptor();
      sd.setCols(cols);
      sd.setLocation(location);
      sd.setInputFormat(inputFormat);
      sd.setOutputFormat(outputFormat);
      sd.setNumBuckets(numBuckets);
      sd.setSerdeInfo(serDeInfo);
      sd.setBucketCols(bucketCols);
      sd.setSortCols(new ArrayList<Order>());
      sd.setParameters(new HashMap<String, String>());

      // Define the table
      Table tbl = new Table();
      tbl.setDbName(propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY));
      tbl.setTableName(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY));
      tbl.setSd(sd);
      tbl.setOwner(System.getProperty("user.name"));
      tbl.setParameters(new HashMap<String, String>());
      tbl.setViewOriginalText("");
      tbl.setViewExpandedText("");
      tbl.setTableType(TableType.EXTERNAL_TABLE.name());
      List<FieldSchema> partitions = new ArrayList<FieldSchema>();
      partitions.add(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, ""));
      tbl.setPartitionKeys(partitions);

      // Create the table
      hiveClient.createTable(tbl);

      // Describe the table
      Table createdTable =
          hiveClient.getTable(
              propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY),
              propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY));
      LOG.info("HIVE: Created Table: {}", createdTable.toString());
      assertThat(
          createdTable.toString(),
          containsString(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY)));

    } catch (MetaException e) {
      e.printStackTrace();
    } catch (TException e) {
      e.printStackTrace();
    }
  }
Beispiel #21
0
  @Override
  public final void createTable(final CatalogProtos.TableDescProto tableDescProto)
      throws CatalogException {
    HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null;

    TableDesc tableDesc = new TableDesc(tableDescProto);
    String[] splitted = CatalogUtil.splitFQTableName(tableDesc.getName());
    String databaseName = splitted[0];
    String tableName = splitted[1];

    try {
      client = clientPool.getClient();

      org.apache.hadoop.hive.metastore.api.Table table =
          new org.apache.hadoop.hive.metastore.api.Table();
      table.setDbName(databaseName);
      table.setTableName(tableName);
      table.setParameters(
          new HashMap<String, String>(tableDesc.getMeta().getOptions().getAllKeyValus()));
      // TODO: set owner
      // table.setOwner();

      StorageDescriptor sd = new StorageDescriptor();
      sd.setSerdeInfo(new SerDeInfo());
      sd.getSerdeInfo().setParameters(new HashMap<String, String>());
      sd.getSerdeInfo().setName(table.getTableName());

      // if tajo set location method, thrift client make exception as follows:
      // Caused by: MetaException(message:java.lang.NullPointerException)
      // If you want to modify table path, you have to modify on Hive cli.
      if (tableDesc.isExternal()) {
        table.setTableType(TableType.EXTERNAL_TABLE.name());
        table.putToParameters("EXTERNAL", "TRUE");

        Path tablePath = new Path(tableDesc.getUri());
        FileSystem fs = tablePath.getFileSystem(conf);
        if (fs.isFile(tablePath)) {
          LOG.warn("A table path is a file, but HiveCatalogStore does not allow a file path.");
          sd.setLocation(tablePath.getParent().toString());
        } else {
          sd.setLocation(tablePath.toString());
        }
      }

      // set column information
      List<Column> columns = tableDesc.getSchema().getRootColumns();
      ArrayList<FieldSchema> cols = new ArrayList<FieldSchema>(columns.size());

      for (Column eachField : columns) {
        cols.add(
            new FieldSchema(
                eachField.getSimpleName(),
                HiveCatalogUtil.getHiveFieldType(eachField.getDataType()),
                ""));
      }
      sd.setCols(cols);

      // set partition keys
      if (tableDesc.hasPartition()
          && tableDesc.getPartitionMethod().getPartitionType().equals(PartitionType.COLUMN)) {
        List<FieldSchema> partitionKeys = new ArrayList<FieldSchema>();
        for (Column eachPartitionKey :
            tableDesc.getPartitionMethod().getExpressionSchema().getRootColumns()) {
          partitionKeys.add(
              new FieldSchema(
                  eachPartitionKey.getSimpleName(),
                  HiveCatalogUtil.getHiveFieldType(eachPartitionKey.getDataType()),
                  ""));
        }
        table.setPartitionKeys(partitionKeys);
      }

      if (tableDesc.getMeta().getStoreType().equalsIgnoreCase(BuiltinStorages.RCFILE)) {
        String serde = tableDesc.getMeta().getOption(StorageConstants.RCFILE_SERDE);
        sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName());
        sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName());
        if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName());
        } else {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe.class.getName());
        }

        if (tableDesc.getMeta().getOptions().containsKey(StorageConstants.RCFILE_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.RCFILE_NULL)));
        }
      } else if (tableDesc.getMeta().getStoreType().equals(BuiltinStorages.TEXT)) {
        sd.getSerdeInfo()
            .setSerializationLib(
                org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
        sd.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class.getName());
        sd.setOutputFormat(
            org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat.class.getName());

        String fieldDelimiter =
            tableDesc
                .getMeta()
                .getOption(
                    StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);

        // User can use an unicode for filed delimiter such as \u0001, \001.
        // In this case, java console will convert this value into "\\u001".
        // And hive will un-espace this value again.
        // As a result, user can use right field delimiter.
        // So, we have to un-escape this value.
        sd.getSerdeInfo()
            .putToParameters(
                serdeConstants.SERIALIZATION_FORMAT,
                StringEscapeUtils.unescapeJava(fieldDelimiter));
        sd.getSerdeInfo()
            .putToParameters(
                serdeConstants.FIELD_DELIM, StringEscapeUtils.unescapeJava(fieldDelimiter));
        table.getParameters().remove(StorageConstants.TEXT_DELIMITER);

        if (tableDesc.getMeta().containsOption(StorageConstants.TEXT_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.TEXT_NULL)));
          table.getParameters().remove(StorageConstants.TEXT_NULL);
        }
      } else if (tableDesc
          .getMeta()
          .getStoreType()
          .equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) {
        String serde = tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_SERDE);
        sd.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName());
        sd.setOutputFormat(
            org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat.class.getName());

        if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());

          String fieldDelimiter =
              tableDesc
                  .getMeta()
                  .getOption(
                      StorageConstants.SEQUENCEFILE_DELIMITER,
                      StorageConstants.DEFAULT_FIELD_DELIMITER);

          // User can use an unicode for filed delimiter such as \u0001, \001.
          // In this case, java console will convert this value into "\\u001".
          // And hive will un-espace this value again.
          // As a result, user can use right field delimiter.
          // So, we have to un-escape this value.
          sd.getSerdeInfo()
              .putToParameters(
                  serdeConstants.SERIALIZATION_FORMAT,
                  StringEscapeUtils.unescapeJava(fieldDelimiter));
          sd.getSerdeInfo()
              .putToParameters(
                  serdeConstants.FIELD_DELIM, StringEscapeUtils.unescapeJava(fieldDelimiter));
          table.getParameters().remove(StorageConstants.SEQUENCEFILE_DELIMITER);
        } else {
          sd.getSerdeInfo()
              .setSerializationLib(
                  org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class.getName());
        }

        if (tableDesc.getMeta().containsOption(StorageConstants.SEQUENCEFILE_NULL)) {
          table.putToParameters(
              serdeConstants.SERIALIZATION_NULL_FORMAT,
              StringEscapeUtils.unescapeJava(
                  tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_NULL)));
          table.getParameters().remove(StorageConstants.SEQUENCEFILE_NULL);
        }
      } else {
        if (tableDesc.getMeta().getStoreType().equalsIgnoreCase(BuiltinStorages.PARQUET)) {
          sd.setInputFormat(parquet.hive.DeprecatedParquetInputFormat.class.getName());
          sd.setOutputFormat(parquet.hive.DeprecatedParquetOutputFormat.class.getName());
          sd.getSerdeInfo()
              .setSerializationLib(parquet.hive.serde.ParquetHiveSerDe.class.getName());
        } else {
          throw new UnsupportedException(
              tableDesc.getMeta().getStoreType() + " in HivecatalogStore");
        }
      }

      sd.setSortCols(new ArrayList<Order>());

      table.setSd(sd);
      client.getHiveClient().createTable(table);
    } catch (Throwable t) {
      throw new TajoInternalError(t);
    } finally {
      if (client != null) client.release();
    }
  }
Beispiel #22
0
  // todo: this doesn;t check if compaction is already running (even though Initiator does but we
  // don't go  through Initiator for user initiated compactions)
  @Override
  public void run() {
    do {
      boolean launchedJob = false;
      // Make sure nothing escapes this run method and kills the metastore at large,
      // so wrap it in a big catch Throwable statement.
      try {
        final CompactionInfo ci = txnHandler.findNextToCompact(name);

        if (ci == null && !stop.get()) {
          try {
            Thread.sleep(SLEEP_TIME);
            continue;
          } catch (InterruptedException e) {
            LOG.warn("Worker thread sleep interrupted " + e.getMessage());
            continue;
          }
        }

        // Find the table we will be working with.
        Table t1 = null;
        try {
          t1 = resolveTable(ci);
          if (t1 == null) {
            LOG.info(
                "Unable to find table "
                    + ci.getFullTableName()
                    + ", assuming it was dropped and moving on.");
            txnHandler.markCleaned(ci);
            continue;
          }
        } catch (MetaException e) {
          txnHandler.markCleaned(ci);
          continue;
        }
        // This chicanery is to get around the fact that the table needs to be final in order to
        // go into the doAs below.
        final Table t = t1;

        // Find the partition we will be working with, if there is one.
        Partition p = null;
        try {
          p = resolvePartition(ci);
          if (p == null && ci.partName != null) {
            LOG.info(
                "Unable to find partition "
                    + ci.getFullPartitionName()
                    + ", assuming it was dropped and moving on.");
            txnHandler.markCleaned(ci);
            continue;
          }
        } catch (Exception e) {
          txnHandler.markCleaned(ci);
          continue;
        }

        // Find the appropriate storage descriptor
        final StorageDescriptor sd = resolveStorageDescriptor(t, p);

        // Check that the table or partition isn't sorted, as we don't yet support that.
        if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
          LOG.error("Attempt to compact sorted table, which is not yet supported!");
          txnHandler.markCleaned(ci);
          continue;
        }

        final boolean isMajor = ci.isMajorCompaction();
        final ValidTxnList txns =
            CompactionTxnHandler.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
        LOG.debug("ValidCompactTxnList: " + txns.writeToString());
        txnHandler.setCompactionHighestTxnId(ci, txns.getHighWatermark());
        final StringBuilder jobName = new StringBuilder(name);
        jobName.append("-compactor-");
        jobName.append(ci.getFullPartitionName());

        // Determine who to run as
        String runAs;
        if (ci.runAs == null) {
          runAs = findUserToRunAs(sd.getLocation(), t);
          txnHandler.setRunAs(ci.id, runAs);
        } else {
          runAs = ci.runAs;
        }

        LOG.info("Starting " + ci.type.toString() + " compaction for " + ci.getFullPartitionName());

        final StatsUpdater su =
            StatsUpdater.init(
                ci,
                txnHandler.findColumnsWithStats(ci),
                conf,
                runJobAsSelf(runAs) ? runAs : t.getOwner());
        final CompactorMR mr = new CompactorMR();
        launchedJob = true;
        try {
          if (runJobAsSelf(runAs)) {
            mr.run(conf, jobName.toString(), t, sd, txns, ci, su);
          } else {
            UserGroupInformation ugi =
                UserGroupInformation.createProxyUser(
                    t.getOwner(), UserGroupInformation.getLoginUser());
            ugi.doAs(
                new PrivilegedExceptionAction<Object>() {
                  @Override
                  public Object run() throws Exception {
                    mr.run(conf, jobName.toString(), t, sd, txns, ci, su);
                    return null;
                  }
                });
          }
          txnHandler.markCompacted(ci);
        } catch (Exception e) {
          LOG.error(
              "Caught exception while trying to compact "
                  + ci
                  + ".  Marking clean to avoid repeated failures, "
                  + StringUtils.stringifyException(e));
          txnHandler.markFailed(ci);
        }
      } catch (Throwable t) {
        LOG.error(
            "Caught an exception in the main loop of compactor worker "
                + name
                + ", "
                + StringUtils.stringifyException(t));
      }

      // If we didn't try to launch a job it either means there was no work to do or we got
      // here as the result of a communication failure with the DB.  Either way we want to wait
      // a bit before we restart the loop.
      if (!launchedJob && !stop.get()) {
        try {
          Thread.sleep(SLEEP_TIME);
        } catch (InterruptedException e) {
        }
      }
    } while (!stop.get());
  }
Beispiel #23
0
  public int compareTo(StorageDescriptor other) {
    if (!getClass().equals(other.getClass())) {
      return getClass().getName().compareTo(other.getClass().getName());
    }

    int lastComparison = 0;
    StorageDescriptor typedOther = (StorageDescriptor) other;

    lastComparison = Boolean.valueOf(isSetCols()).compareTo(typedOther.isSetCols());
    if (lastComparison != 0) {
      return lastComparison;
    }
    if (isSetCols()) {
      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.cols, typedOther.cols);
      if (lastComparison != 0) {
        return lastComparison;
      }
    }
    lastComparison = Boolean.valueOf(isSetLocation()).compareTo(typedOther.isSetLocation());
    if (lastComparison != 0) {
      return lastComparison;
    }
    if (isSetLocation()) {
      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.location, typedOther.location);
      if (lastComparison != 0) {
        return lastComparison;
      }
    }
    lastComparison = Boolean.valueOf(isSetInputFormat()).compareTo(typedOther.isSetInputFormat());
    if (lastComparison != 0) {
      return lastComparison;
    }
    if (isSetInputFormat()) {
      lastComparison =
          org.apache.thrift.TBaseHelper.compareTo(this.inputFormat, typedOther.inputFormat);
      if (lastComparison != 0) {
        return lastComparison;
      }
    }
    lastComparison = Boolean.valueOf(isSetOutputFormat()).compareTo(typedOther.isSetOutputFormat());
    if (lastComparison != 0) {
      return lastComparison;
    }
    if (isSetOutputFormat()) {
      lastComparison =
          org.apache.thrift.TBaseHelper.compareTo(this.outputFormat, typedOther.outputFormat);
      if (lastComparison != 0) {
        return lastComparison;
      }
    }
    lastComparison = Boolean.valueOf(isSetCompressed()).compareTo(typedOther.isSetCompressed());
    if (lastComparison != 0) {
      return lastComparison;
    }
    if (isSetCompressed()) {
      lastComparison =
          org.apache.thrift.TBaseHelper.compareTo(this.compressed, typedOther.compressed);
      if (lastComparison != 0) {
        return lastComparison;
      }
    }
    lastComparison = Boolean.valueOf(isSetNumBuckets()).compareTo(typedOther.isSetNumBuckets());
    if (lastComparison != 0) {
      return lastComparison;
    }
    if (isSetNumBuckets()) {
      lastComparison =
          org.apache.thrift.TBaseHelper.compareTo(this.numBuckets, typedOther.numBuckets);
      if (lastComparison != 0) {
        return lastComparison;
      }
    }
    lastComparison = Boolean.valueOf(isSetSerdeInfo()).compareTo(typedOther.isSetSerdeInfo());
    if (lastComparison != 0) {
      return lastComparison;
    }
    if (isSetSerdeInfo()) {
      lastComparison =
          org.apache.thrift.TBaseHelper.compareTo(this.serdeInfo, typedOther.serdeInfo);
      if (lastComparison != 0) {
        return lastComparison;
      }
    }
    lastComparison = Boolean.valueOf(isSetBucketCols()).compareTo(typedOther.isSetBucketCols());
    if (lastComparison != 0) {
      return lastComparison;
    }
    if (isSetBucketCols()) {
      lastComparison =
          org.apache.thrift.TBaseHelper.compareTo(this.bucketCols, typedOther.bucketCols);
      if (lastComparison != 0) {
        return lastComparison;
      }
    }
    lastComparison = Boolean.valueOf(isSetSortCols()).compareTo(typedOther.isSetSortCols());
    if (lastComparison != 0) {
      return lastComparison;
    }
    if (isSetSortCols()) {
      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sortCols, typedOther.sortCols);
      if (lastComparison != 0) {
        return lastComparison;
      }
    }
    lastComparison = Boolean.valueOf(isSetParameters()).compareTo(typedOther.isSetParameters());
    if (lastComparison != 0) {
      return lastComparison;
    }
    if (isSetParameters()) {
      lastComparison =
          org.apache.thrift.TBaseHelper.compareTo(this.parameters, typedOther.parameters);
      if (lastComparison != 0) {
        return lastComparison;
      }
    }
    return 0;
  }