Esempio n. 1
0
  @Override
  public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {

    try {
      Map<String, String> tableProperties = tableDesc.getJobProperties();

      String jobInfoProperty = tableProperties.get(HCatConstants.HCAT_KEY_JOB_INFO);
      if (jobInfoProperty != null) {

        InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobInfoProperty);

        HCatTableInfo tableInfo = inputJobInfo.getTableInfo();
        HCatSchema dataColumns = tableInfo.getDataColumns();
        List<HCatFieldSchema> dataFields = dataColumns.getFields();
        StringBuilder columnNamesSb = new StringBuilder();
        StringBuilder typeNamesSb = new StringBuilder();
        for (HCatFieldSchema dataField : dataFields) {
          if (columnNamesSb.length() > 0) {
            columnNamesSb.append(",");
            typeNamesSb.append(":");
          }
          columnNamesSb.append(dataField.getName());
          typeNamesSb.append(dataField.getTypeString());
        }
        jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesSb.toString());
        jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, typeNamesSb.toString());

        boolean isAcidTable = AcidUtils.isTablePropertyTransactional(tableProperties);
        AcidUtils.setTransactionalTableScan(jobProperties, isAcidTable);
      }
    } catch (IOException e) {
      throw new IllegalStateException("Failed to set output path", e);
    }
  }
  @BeforeClass
  public static void setUpOneTime() throws Exception {
    fs = new LocalFileSystem();
    fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration());

    HiveConf hiveConf = new HiveConf();
    hiveConf.setInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, 0);
    // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time
    // Otherwise the cache doesn't play well with the second test method with the client gets
    // closed() in the
    // tearDown() of the previous test
    HCatUtil.getHiveMetastoreClient(hiveConf);

    MapCreate.writeCount = 0;
    MapRead.readCount = 0;
  }
  private static Properties getSerdeProperties(HCatTableInfo info, HCatSchema s)
      throws SerDeException {
    Properties props = new Properties();
    List<FieldSchema> fields = HCatUtil.getFieldSchemaList(s.getFields());
    props.setProperty(
        org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS,
        MetaStoreUtils.getColumnNamesFromFieldSchema(fields));
    props.setProperty(
        org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES,
        MetaStoreUtils.getColumnTypesFromFieldSchema(fields));

    // setting these props to match LazySimpleSerde
    props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N");
    props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1");

    // add props from params set in table schema
    props.putAll(info.getStorerInfo().getProperties());

    return props;
  }
Esempio n. 4
0
  @Override
  public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    try {
      OutputJobInfo jobInfo =
          (OutputJobInfo)
              HCatUtil.deserialize(
                  tableDesc.getJobProperties().get(HCatConstants.HCAT_KEY_OUTPUT_INFO));
      String parentPath = jobInfo.getTableInfo().getTableLocation();
      String dynHash = tableDesc.getJobProperties().get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID);
      String idHash = tableDesc.getJobProperties().get(HCatConstants.HCAT_OUTPUT_ID_HASH);

      // For dynamic partitioned writes without all keyvalues specified,
      // we create a temp dir for the associated write job
      if (dynHash != null) {
        // if external table and custom root specified, update the parent path
        if (Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL"))
            && jobInfo.getCustomDynamicRoot() != null
            && jobInfo.getCustomDynamicRoot().length() > 0) {
          parentPath = new Path(parentPath, jobInfo.getCustomDynamicRoot()).toString();
        }
        parentPath =
            new Path(parentPath, FileOutputCommitterContainer.DYNTEMP_DIR_NAME + dynHash)
                .toString();
      } else {
        parentPath =
            new Path(parentPath, FileOutputCommitterContainer.SCRATCH_DIR_NAME + idHash).toString();
      }

      String outputLocation;

      if ((dynHash != null)
          && Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL"))
          && jobInfo.getCustomDynamicPath() != null
          && jobInfo.getCustomDynamicPath().length() > 0) {
        // dynamic partitioning with custom path; resolve the custom path
        // using partition column values
        outputLocation = HCatFileUtil.resolveCustomPath(jobInfo, null, true);
      } else if ((dynHash == null)
          && Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL"))
          && jobInfo.getLocation() != null
          && jobInfo.getLocation().length() > 0) {
        // honor custom location for external table apart from what metadata specifies
        outputLocation = jobInfo.getLocation();
      } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) {
        // Unpartitioned table, writing to the scratch dir directly is good enough.
        outputLocation = "";
      } else {
        List<String> cols = new ArrayList<String>();
        List<String> values = new ArrayList<String>();

        // Get the output location in the order partition keys are defined for the table.
        for (String name : jobInfo.getTableInfo().getPartitionColumns().getFieldNames()) {
          String value = jobInfo.getPartitionValues().get(name);
          cols.add(name);
          values.add(value);
        }
        outputLocation = FileUtils.makePartName(cols, values);
      }

      if (outputLocation != null && !outputLocation.isEmpty()) {
        jobInfo.setLocation(new Path(parentPath, outputLocation).toString());
      } else {
        jobInfo.setLocation(new Path(parentPath).toString());
      }

      // only set output dir if partition is fully materialized
      if (jobInfo.getPartitionValues().size()
          == jobInfo.getTableInfo().getPartitionColumns().size()) {
        jobProperties.put("mapred.output.dir", jobInfo.getLocation());
      }

      SpecialCases.addSpecialCasesParametersToOutputJobProperties(jobProperties, jobInfo, ofClass);

      jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo));
    } catch (IOException e) {
      throw new IllegalStateException("Failed to set output path", e);
    }
  }
  /**
   * Run a local map reduce job to load data from in memory records to an HCatalog Table
   *
   * @param partitionValues
   * @param partitionColumns
   * @param records data to be written to HCatalog table
   * @param writeCount
   * @param assertWrite
   * @param asSingleMapTask
   * @return
   * @throws Exception
   */
  Job runMRCreate(
      Map<String, String> partitionValues,
      List<HCatFieldSchema> partitionColumns,
      List<HCatRecord> records,
      int writeCount,
      boolean assertWrite,
      boolean asSingleMapTask,
      String customDynamicPathPattern)
      throws Exception {

    writeRecords = records;
    MapCreate.writeCount = 0;

    Configuration conf = new Configuration();
    Job job = new Job(conf, "hcat mapreduce write test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(HCatMapReduceTest.MapCreate.class);

    // input/output settings
    job.setInputFormatClass(TextInputFormat.class);

    if (asSingleMapTask) {
      // One input path would mean only one map task
      Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
      createInputFile(path, writeCount);
      TextInputFormat.setInputPaths(job, path);
    } else {
      // Create two input paths so that two map tasks get triggered. There could be other ways
      // to trigger two map tasks.
      Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
      createInputFile(path, writeCount / 2);

      Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2");
      createInputFile(path2, (writeCount - writeCount / 2));

      TextInputFormat.setInputPaths(job, path, path2);
    }

    job.setOutputFormatClass(HCatOutputFormat.class);

    OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues);
    if (customDynamicPathPattern != null) {
      job.getConfiguration()
          .set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern);
    }
    HCatOutputFormat.setOutput(job, outputJobInfo);

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(DefaultHCatRecord.class);

    job.setNumReduceTasks(0);

    HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns));

    boolean success = job.waitForCompletion(true);

    // Ensure counters are set when data has actually been read.
    if (partitionValues != null) {
      assertTrue(
          job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue()
              > 0);
    }

    if (!HCatUtil.isHadoop23()) {
      // Local mode outputcommitter hook is not invoked in Hadoop 1.x
      if (success) {
        new FileOutputCommitterContainer(job, null).commitJob(job);
      } else {
        new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED);
      }
    }
    if (assertWrite) {
      // we assert only if we expected to assert with this call.
      Assert.assertEquals(writeCount, MapCreate.writeCount);
    }

    if (isTableExternal()) {
      externalTableLocation = outputJobInfo.getTableInfo().getTableLocation();
    }

    return job;
  }