Пример #1
0
  private Map<HivePartitionName, Optional<Partition>> loadPartitionsByNames(
      Iterable<? extends HivePartitionName> partitionNames) throws Exception {
    requireNonNull(partitionNames, "partitionNames is null");
    checkArgument(!Iterables.isEmpty(partitionNames), "partitionNames is empty");

    HivePartitionName firstPartition = Iterables.get(partitionNames, 0);

    HiveTableName hiveTableName = firstPartition.getHiveTableName();
    String databaseName = hiveTableName.getDatabaseName();
    String tableName = hiveTableName.getTableName();

    List<String> partitionsToFetch = new ArrayList<>();
    for (HivePartitionName partitionName : partitionNames) {
      checkArgument(
          partitionName.getHiveTableName().equals(hiveTableName),
          "Expected table name %s but got %s",
          hiveTableName,
          partitionName.getHiveTableName());
      partitionsToFetch.add(partitionName.getPartitionName());
    }

    List<String> partitionColumnNames =
        ImmutableList.copyOf(
            Warehouse.makeSpecFromName(firstPartition.getPartitionName()).keySet());

    try {
      return retry()
          .stopOn(NoSuchObjectException.class)
          .stopOnIllegalExceptions()
          .run(
              "getPartitionsByNames",
              stats
                  .getGetPartitionsByNames()
                  .wrap(
                      () -> {
                        try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) {
                          ImmutableMap.Builder<HivePartitionName, Optional<Partition>> partitions =
                              ImmutableMap.builder();
                          for (Partition partition :
                              client.getPartitionsByNames(
                                  databaseName, tableName, partitionsToFetch)) {
                            String partitionId =
                                FileUtils.makePartName(
                                    partitionColumnNames, partition.getValues(), null);
                            partitions.put(
                                HivePartitionName.partition(databaseName, tableName, partitionId),
                                Optional.of(partition));
                          }
                          return partitions.build();
                        }
                      }));
    } catch (NoSuchObjectException e) {
      // assume none of the partitions in the batch are available
      return stream(partitionNames.spliterator(), false)
          .collect(toMap(identity(), (name) -> Optional.empty()));
    } catch (TException e) {
      throw new PrestoException(HIVE_METASTORE_ERROR, e);
    }
  }
Пример #2
0
  /** Create a map-reduce scratch directory on demand and return it. */
  public String getMRScratchDir() {

    // if we are executing entirely on the client side - then
    // just (re)use the local scratch directory
    if (isLocalOnlyExecutionMode()) {
      return getLocalScratchDir(!explain);
    }

    try {
      Path dir = FileUtils.makeQualified(nonLocalScratchPath, conf);
      URI uri = dir.toUri();
      return getScratchDir(uri.getScheme(), uri.getAuthority(), !explain, uri.getPath());

    } catch (IOException e) {
      throw new RuntimeException(e);
    } catch (IllegalArgumentException e) {
      throw new RuntimeException(
          "Error while making MR scratch "
              + "directory - check filesystem config ("
              + e.getCause()
              + ")",
          e);
    }
  }
 private static String makePartName(List<FieldSchema> partitionColumns, List<String> values) {
   checkArgument(partitionColumns.size() == values.size());
   List<String> partitionColumnNames =
       partitionColumns.stream().map(FieldSchema::getName).collect(toList());
   return FileUtils.makePartName(partitionColumnNames, values);
 }
Пример #4
0
  @Override
  public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    try {
      OutputJobInfo jobInfo =
          (OutputJobInfo)
              HCatUtil.deserialize(
                  tableDesc.getJobProperties().get(HCatConstants.HCAT_KEY_OUTPUT_INFO));
      String parentPath = jobInfo.getTableInfo().getTableLocation();
      String dynHash = tableDesc.getJobProperties().get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID);
      String idHash = tableDesc.getJobProperties().get(HCatConstants.HCAT_OUTPUT_ID_HASH);

      // For dynamic partitioned writes without all keyvalues specified,
      // we create a temp dir for the associated write job
      if (dynHash != null) {
        // if external table and custom root specified, update the parent path
        if (Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL"))
            && jobInfo.getCustomDynamicRoot() != null
            && jobInfo.getCustomDynamicRoot().length() > 0) {
          parentPath = new Path(parentPath, jobInfo.getCustomDynamicRoot()).toString();
        }
        parentPath =
            new Path(parentPath, FileOutputCommitterContainer.DYNTEMP_DIR_NAME + dynHash)
                .toString();
      } else {
        parentPath =
            new Path(parentPath, FileOutputCommitterContainer.SCRATCH_DIR_NAME + idHash).toString();
      }

      String outputLocation;

      if ((dynHash != null)
          && Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL"))
          && jobInfo.getCustomDynamicPath() != null
          && jobInfo.getCustomDynamicPath().length() > 0) {
        // dynamic partitioning with custom path; resolve the custom path
        // using partition column values
        outputLocation = HCatFileUtil.resolveCustomPath(jobInfo, null, true);
      } else if ((dynHash == null)
          && Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL"))
          && jobInfo.getLocation() != null
          && jobInfo.getLocation().length() > 0) {
        // honor custom location for external table apart from what metadata specifies
        outputLocation = jobInfo.getLocation();
      } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) {
        // Unpartitioned table, writing to the scratch dir directly is good enough.
        outputLocation = "";
      } else {
        List<String> cols = new ArrayList<String>();
        List<String> values = new ArrayList<String>();

        // Get the output location in the order partition keys are defined for the table.
        for (String name : jobInfo.getTableInfo().getPartitionColumns().getFieldNames()) {
          String value = jobInfo.getPartitionValues().get(name);
          cols.add(name);
          values.add(value);
        }
        outputLocation = FileUtils.makePartName(cols, values);
      }

      if (outputLocation != null && !outputLocation.isEmpty()) {
        jobInfo.setLocation(new Path(parentPath, outputLocation).toString());
      } else {
        jobInfo.setLocation(new Path(parentPath).toString());
      }

      // only set output dir if partition is fully materialized
      if (jobInfo.getPartitionValues().size()
          == jobInfo.getTableInfo().getPartitionColumns().size()) {
        jobProperties.put("mapred.output.dir", jobInfo.getLocation());
      }

      SpecialCases.addSpecialCasesParametersToOutputJobProperties(jobProperties, jobInfo, ofClass);

      jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo));
    } catch (IOException e) {
      throw new IllegalStateException("Failed to set output path", e);
    }
  }