Beispiel #1
0
  @Override
  public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {

    try {
      Map<String, String> tableProperties = tableDesc.getJobProperties();

      String jobInfoProperty = tableProperties.get(HCatConstants.HCAT_KEY_JOB_INFO);
      if (jobInfoProperty != null) {

        InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobInfoProperty);

        HCatTableInfo tableInfo = inputJobInfo.getTableInfo();
        HCatSchema dataColumns = tableInfo.getDataColumns();
        List<HCatFieldSchema> dataFields = dataColumns.getFields();
        StringBuilder columnNamesSb = new StringBuilder();
        StringBuilder typeNamesSb = new StringBuilder();
        for (HCatFieldSchema dataField : dataFields) {
          if (columnNamesSb.length() > 0) {
            columnNamesSb.append(",");
            typeNamesSb.append(":");
          }
          columnNamesSb.append(dataField.getName());
          typeNamesSb.append(dataField.getTypeString());
        }
        jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesSb.toString());
        jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, typeNamesSb.toString());

        boolean isAcidTable = AcidUtils.isTablePropertyTransactional(tableProperties);
        AcidUtils.setTransactionalTableScan(jobProperties, isAcidTable);
      }
    } catch (IOException e) {
      throw new IllegalStateException("Failed to set output path", e);
    }
  }
Beispiel #2
0
  @Override
  public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    try {
      OutputJobInfo jobInfo =
          (OutputJobInfo)
              HCatUtil.deserialize(
                  tableDesc.getJobProperties().get(HCatConstants.HCAT_KEY_OUTPUT_INFO));
      String parentPath = jobInfo.getTableInfo().getTableLocation();
      String dynHash = tableDesc.getJobProperties().get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID);
      String idHash = tableDesc.getJobProperties().get(HCatConstants.HCAT_OUTPUT_ID_HASH);

      // For dynamic partitioned writes without all keyvalues specified,
      // we create a temp dir for the associated write job
      if (dynHash != null) {
        // if external table and custom root specified, update the parent path
        if (Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL"))
            && jobInfo.getCustomDynamicRoot() != null
            && jobInfo.getCustomDynamicRoot().length() > 0) {
          parentPath = new Path(parentPath, jobInfo.getCustomDynamicRoot()).toString();
        }
        parentPath =
            new Path(parentPath, FileOutputCommitterContainer.DYNTEMP_DIR_NAME + dynHash)
                .toString();
      } else {
        parentPath =
            new Path(parentPath, FileOutputCommitterContainer.SCRATCH_DIR_NAME + idHash).toString();
      }

      String outputLocation;

      if ((dynHash != null)
          && Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL"))
          && jobInfo.getCustomDynamicPath() != null
          && jobInfo.getCustomDynamicPath().length() > 0) {
        // dynamic partitioning with custom path; resolve the custom path
        // using partition column values
        outputLocation = HCatFileUtil.resolveCustomPath(jobInfo, null, true);
      } else if ((dynHash == null)
          && Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL"))
          && jobInfo.getLocation() != null
          && jobInfo.getLocation().length() > 0) {
        // honor custom location for external table apart from what metadata specifies
        outputLocation = jobInfo.getLocation();
      } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) {
        // Unpartitioned table, writing to the scratch dir directly is good enough.
        outputLocation = "";
      } else {
        List<String> cols = new ArrayList<String>();
        List<String> values = new ArrayList<String>();

        // Get the output location in the order partition keys are defined for the table.
        for (String name : jobInfo.getTableInfo().getPartitionColumns().getFieldNames()) {
          String value = jobInfo.getPartitionValues().get(name);
          cols.add(name);
          values.add(value);
        }
        outputLocation = FileUtils.makePartName(cols, values);
      }

      if (outputLocation != null && !outputLocation.isEmpty()) {
        jobInfo.setLocation(new Path(parentPath, outputLocation).toString());
      } else {
        jobInfo.setLocation(new Path(parentPath).toString());
      }

      // only set output dir if partition is fully materialized
      if (jobInfo.getPartitionValues().size()
          == jobInfo.getTableInfo().getPartitionColumns().size()) {
        jobProperties.put("mapred.output.dir", jobInfo.getLocation());
      }

      SpecialCases.addSpecialCasesParametersToOutputJobProperties(jobProperties, jobInfo, ofClass);

      jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo));
    } catch (IOException e) {
      throw new IllegalStateException("Failed to set output path", e);
    }
  }