Exemplo n.º 1
0
  public static Partition fromMetastoreApiPartition(
      org.apache.hadoop.hive.metastore.api.Partition partition) {
    StorageDescriptor storageDescriptor = partition.getSd();
    if (storageDescriptor == null) {
      throw new PrestoException(
          HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
    }

    Partition.Builder partitionBuilder =
        Partition.builder()
            .setDatabaseName(partition.getDbName())
            .setTableName(partition.getTableName())
            .setValues(partition.getValues())
            .setColumns(
                storageDescriptor
                    .getCols()
                    .stream()
                    .map(MetastoreUtil::fromMetastoreApiFieldSchema)
                    .collect(toList()))
            .setParameters(partition.getParameters());

    fromMetastoreApiStorageDescriptor(
        storageDescriptor,
        partitionBuilder.getStorageBuilder(),
        format("%s.%s", partition.getTableName(), partition.getValues()));

    return partitionBuilder.build();
  }
Exemplo n.º 2
0
  public static Table fromMetastoreApiTable(org.apache.hadoop.hive.metastore.api.Table table) {
    StorageDescriptor storageDescriptor = table.getSd();
    if (storageDescriptor == null) {
      throw new PrestoException(HIVE_INVALID_METADATA, "Table is missing storage descriptor");
    }

    Table.Builder tableBuilder =
        Table.builder()
            .setDatabaseName(table.getDbName())
            .setTableName(table.getTableName())
            .setOwner(nullToEmpty(table.getOwner()))
            .setTableType(table.getTableType())
            .setDataColumns(
                storageDescriptor
                    .getCols()
                    .stream()
                    .map(MetastoreUtil::fromMetastoreApiFieldSchema)
                    .collect(toList()))
            .setPartitionColumns(
                table
                    .getPartitionKeys()
                    .stream()
                    .map(MetastoreUtil::fromMetastoreApiFieldSchema)
                    .collect(toList()))
            .setParameters(
                table.getParameters() == null ? ImmutableMap.of() : table.getParameters())
            .setViewOriginalText(Optional.ofNullable(emptyToNull(table.getViewOriginalText())))
            .setViewExpandedText(Optional.ofNullable(emptyToNull(table.getViewExpandedText())));

    fromMetastoreApiStorageDescriptor(
        storageDescriptor, tableBuilder.getStorageBuilder(), table.getTableName());

    return tableBuilder.build();
  }
Exemplo n.º 3
0
 /**
  * This code block iterates over indexes on the table and populates the indexToKeys map for all
  * the indexes that satisfy the rewrite criteria.
  *
  * @param indexTables
  * @return
  * @throws SemanticException
  */
 Map<Index, Set<String>> getIndexToKeysMap(List<Index> indexTables) throws SemanticException {
   Index index = null;
   Hive hiveInstance = hiveDb;
   Map<Index, Set<String>> indexToKeysMap = new LinkedHashMap<Index, Set<String>>();
   for (int idxCtr = 0; idxCtr < indexTables.size(); idxCtr++) {
     final Set<String> indexKeyNames = new LinkedHashSet<String>();
     index = indexTables.get(idxCtr);
     // Getting index key columns
     StorageDescriptor sd = index.getSd();
     List<FieldSchema> idxColList = sd.getCols();
     for (FieldSchema fieldSchema : idxColList) {
       indexKeyNames.add(fieldSchema.getName());
     }
     assert indexKeyNames.size() == 1;
     // Check that the index schema is as expected. This code block should
     // catch problems of this rewrite breaking when the AggregateIndexHandler
     // index is changed.
     List<String> idxTblColNames = new ArrayList<String>();
     try {
       Table idxTbl = hiveInstance.getTable(index.getDbName(), index.getIndexTableName());
       for (FieldSchema idxTblCol : idxTbl.getCols()) {
         idxTblColNames.add(idxTblCol.getName());
       }
     } catch (HiveException e) {
       LOG.error(
           "Got exception while locating index table, "
               + "skipping "
               + getName()
               + " optimization");
       LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
       throw new SemanticException(e.getMessage(), e);
     }
     assert (idxTblColNames.contains(IDX_BUCKET_COL));
     assert (idxTblColNames.contains(IDX_OFFSETS_ARRAY_COL));
     // we add all index tables which can be used for rewrite
     // and defer the decision of using a particular index for later
     // this is to allow choosing a index if a better mechanism is
     // designed later to chose a better rewrite
     indexToKeysMap.put(index, indexKeyNames);
   }
   return indexToKeysMap;
 }
Exemplo n.º 4
0
  /**
   * Run a compactor job.
   *
   * @param conf Hive configuration file
   * @param jobName name to run this job with
   * @param t metastore table
   * @param sd metastore storage descriptor
   * @param txns list of valid transactions
   * @param isMajor is this a major compaction?
   * @throws java.io.IOException if the job fails
   */
  void run(
      HiveConf conf,
      String jobName,
      Table t,
      StorageDescriptor sd,
      ValidTxnList txns,
      boolean isMajor,
      Worker.StatsUpdater su)
      throws IOException {
    JobConf job = new JobConf(conf);
    job.setJobName(jobName);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setJarByClass(CompactorMR.class);
    LOG.debug("User jar set to " + job.getJar());
    job.setMapperClass(CompactorMap.class);
    job.setNumReduceTasks(0);
    job.setInputFormat(CompactorInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setOutputCommitter(CompactorOutputCommitter.class);

    String queueName = conf.getVar(HiveConf.ConfVars.COMPACTOR_JOB_QUEUE);
    if (queueName != null && queueName.length() > 0) {
      job.setQueueName(queueName);
    }

    job.set(FINAL_LOCATION, sd.getLocation());
    job.set(TMP_LOCATION, sd.getLocation() + "/" + TMPDIR + "_" + UUID.randomUUID().toString());
    job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat());
    job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat());
    job.setBoolean(IS_MAJOR, isMajor);
    job.setBoolean(IS_COMPRESSED, sd.isCompressed());
    job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString());
    job.setInt(NUM_BUCKETS, sd.getNumBuckets());
    job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
    setColumnTypes(job, sd.getCols());

    // Figure out and encode what files we need to read.  We do this here (rather than in
    // getSplits below) because as part of this we discover our minimum and maximum transactions,
    // and discovering that in getSplits is too late as we then have no way to pass it to our
    // mapper.

    AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(sd.getLocation()), conf, txns, false);
    StringableList dirsToSearch = new StringableList();
    Path baseDir = null;
    if (isMajor) {
      // There may not be a base dir if the partition was empty before inserts or if this
      // partition is just now being converted to ACID.
      baseDir = dir.getBaseDirectory();
      if (baseDir == null) {
        List<HdfsFileStatusWithId> originalFiles = dir.getOriginalFiles();
        if (!(originalFiles == null) && !(originalFiles.size() == 0)) {
          // There are original format files
          for (HdfsFileStatusWithId stat : originalFiles) {
            Path path = stat.getFileStatus().getPath();
            dirsToSearch.add(path);
            LOG.debug("Adding original file " + path + " to dirs to search");
          }
          // Set base to the location so that the input format reads the original files.
          baseDir = new Path(sd.getLocation());
        }
      } else {
        // add our base to the list of directories to search for files in.
        LOG.debug("Adding base directory " + baseDir + " to dirs to search");
        dirsToSearch.add(baseDir);
      }
    }

    List<AcidUtils.ParsedDelta> parsedDeltas = dir.getCurrentDirectories();

    if (parsedDeltas == null || parsedDeltas.size() == 0) {
      // Seriously, no deltas?  Can't compact that.
      LOG.error("No delta files found to compact in " + sd.getLocation());
      return;
    }

    StringableList deltaDirs = new StringableList();
    long minTxn = Long.MAX_VALUE;
    long maxTxn = Long.MIN_VALUE;
    for (AcidUtils.ParsedDelta delta : parsedDeltas) {
      LOG.debug("Adding delta " + delta.getPath() + " to directories to search");
      dirsToSearch.add(delta.getPath());
      deltaDirs.add(delta.getPath());
      minTxn = Math.min(minTxn, delta.getMinTransaction());
      maxTxn = Math.max(maxTxn, delta.getMaxTransaction());
    }

    if (baseDir != null) job.set(BASE_DIR, baseDir.toString());
    job.set(DELTA_DIRS, deltaDirs.toString());
    job.set(DIRS_TO_SEARCH, dirsToSearch.toString());
    job.setLong(MIN_TXN, minTxn);
    job.setLong(MAX_TXN, maxTxn);
    LOG.debug("Setting minimum transaction to " + minTxn);
    LOG.debug("Setting maximume transaction to " + maxTxn);

    RunningJob rj = JobClient.runJob(job);
    LOG.info(
        "Submitted "
            + (isMajor ? CompactionType.MAJOR : CompactionType.MINOR)
            + " compaction job '"
            + jobName
            + "' with jobID="
            + rj.getID()
            + " to "
            + job.getQueueName()
            + " queue.  "
            + "(current delta dirs count="
            + dir.getCurrentDirectories().size()
            + ", obsolete delta dirs count="
            + dir.getObsolete());
    rj.waitForCompletion();
    su.gatherStats();
  }