Example #1
0
  private static void fromMetastoreApiStorageDescriptor(
      StorageDescriptor storageDescriptor, Storage.Builder builder, String tablePartitionName) {
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
      throw new PrestoException(
          HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    builder
        .setStorageFormat(
            StorageFormat.createNullable(
                serdeInfo.getSerializationLib(),
                storageDescriptor.getInputFormat(),
                storageDescriptor.getOutputFormat()))
        .setLocation(nullToEmpty(storageDescriptor.getLocation()))
        .setBucketProperty(
            HiveBucketProperty.fromStorageDescriptor(storageDescriptor, tablePartitionName))
        .setSorted(storageDescriptor.isSetSortCols() && !storageDescriptor.getSortCols().isEmpty())
        .setSkewed(
            storageDescriptor.isSetSkewedInfo()
                && storageDescriptor.getSkewedInfo().isSetSkewedColNames()
                && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty())
        .setSerdeParameters(
            serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters());
  }
Example #2
0
  // todo: this doesn;t check if compaction is already running (even though Initiator does but we
  // don't go  through Initiator for user initiated compactions)
  @Override
  public void run() {
    do {
      boolean launchedJob = false;
      // Make sure nothing escapes this run method and kills the metastore at large,
      // so wrap it in a big catch Throwable statement.
      try {
        final CompactionInfo ci = txnHandler.findNextToCompact(name);

        if (ci == null && !stop.get()) {
          try {
            Thread.sleep(SLEEP_TIME);
            continue;
          } catch (InterruptedException e) {
            LOG.warn("Worker thread sleep interrupted " + e.getMessage());
            continue;
          }
        }

        // Find the table we will be working with.
        Table t1 = null;
        try {
          t1 = resolveTable(ci);
          if (t1 == null) {
            LOG.info(
                "Unable to find table "
                    + ci.getFullTableName()
                    + ", assuming it was dropped and moving on.");
            txnHandler.markCleaned(ci);
            continue;
          }
        } catch (MetaException e) {
          txnHandler.markCleaned(ci);
          continue;
        }
        // This chicanery is to get around the fact that the table needs to be final in order to
        // go into the doAs below.
        final Table t = t1;

        // Find the partition we will be working with, if there is one.
        Partition p = null;
        try {
          p = resolvePartition(ci);
          if (p == null && ci.partName != null) {
            LOG.info(
                "Unable to find partition "
                    + ci.getFullPartitionName()
                    + ", assuming it was dropped and moving on.");
            txnHandler.markCleaned(ci);
            continue;
          }
        } catch (Exception e) {
          txnHandler.markCleaned(ci);
          continue;
        }

        // Find the appropriate storage descriptor
        final StorageDescriptor sd = resolveStorageDescriptor(t, p);

        // Check that the table or partition isn't sorted, as we don't yet support that.
        if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
          LOG.error("Attempt to compact sorted table, which is not yet supported!");
          txnHandler.markCleaned(ci);
          continue;
        }

        final boolean isMajor = ci.isMajorCompaction();
        final ValidTxnList txns =
            CompactionTxnHandler.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
        LOG.debug("ValidCompactTxnList: " + txns.writeToString());
        txnHandler.setCompactionHighestTxnId(ci, txns.getHighWatermark());
        final StringBuilder jobName = new StringBuilder(name);
        jobName.append("-compactor-");
        jobName.append(ci.getFullPartitionName());

        // Determine who to run as
        String runAs;
        if (ci.runAs == null) {
          runAs = findUserToRunAs(sd.getLocation(), t);
          txnHandler.setRunAs(ci.id, runAs);
        } else {
          runAs = ci.runAs;
        }

        LOG.info("Starting " + ci.type.toString() + " compaction for " + ci.getFullPartitionName());

        final StatsUpdater su =
            StatsUpdater.init(
                ci,
                txnHandler.findColumnsWithStats(ci),
                conf,
                runJobAsSelf(runAs) ? runAs : t.getOwner());
        final CompactorMR mr = new CompactorMR();
        launchedJob = true;
        try {
          if (runJobAsSelf(runAs)) {
            mr.run(conf, jobName.toString(), t, sd, txns, ci, su);
          } else {
            UserGroupInformation ugi =
                UserGroupInformation.createProxyUser(
                    t.getOwner(), UserGroupInformation.getLoginUser());
            ugi.doAs(
                new PrivilegedExceptionAction<Object>() {
                  @Override
                  public Object run() throws Exception {
                    mr.run(conf, jobName.toString(), t, sd, txns, ci, su);
                    return null;
                  }
                });
          }
          txnHandler.markCompacted(ci);
        } catch (Exception e) {
          LOG.error(
              "Caught exception while trying to compact "
                  + ci
                  + ".  Marking clean to avoid repeated failures, "
                  + StringUtils.stringifyException(e));
          txnHandler.markFailed(ci);
        }
      } catch (Throwable t) {
        LOG.error(
            "Caught an exception in the main loop of compactor worker "
                + name
                + ", "
                + StringUtils.stringifyException(t));
      }

      // If we didn't try to launch a job it either means there was no work to do or we got
      // here as the result of a communication failure with the DB.  Either way we want to wait
      // a bit before we restart the loop.
      if (!launchedJob && !stop.get()) {
        try {
          Thread.sleep(SLEEP_TIME);
        } catch (InterruptedException e) {
        }
      }
    } while (!stop.get());
  }