@Override public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { try { Map<String, String> tableProperties = tableDesc.getJobProperties(); String jobInfoProperty = tableProperties.get(HCatConstants.HCAT_KEY_JOB_INFO); if (jobInfoProperty != null) { InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobInfoProperty); HCatTableInfo tableInfo = inputJobInfo.getTableInfo(); HCatSchema dataColumns = tableInfo.getDataColumns(); List<HCatFieldSchema> dataFields = dataColumns.getFields(); StringBuilder columnNamesSb = new StringBuilder(); StringBuilder typeNamesSb = new StringBuilder(); for (HCatFieldSchema dataField : dataFields) { if (columnNamesSb.length() > 0) { columnNamesSb.append(","); typeNamesSb.append(":"); } columnNamesSb.append(dataField.getName()); typeNamesSb.append(dataField.getTypeString()); } jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesSb.toString()); jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, typeNamesSb.toString()); boolean isAcidTable = AcidUtils.isTablePropertyTransactional(tableProperties); AcidUtils.setTransactionalTableScan(jobProperties, isAcidTable); } } catch (IOException e) { throw new IllegalStateException("Failed to set output path", e); } }
@BeforeClass public static void setUpOneTime() throws Exception { fs = new LocalFileSystem(); fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); HiveConf hiveConf = new HiveConf(); hiveConf.setInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, 0); // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time // Otherwise the cache doesn't play well with the second test method with the client gets // closed() in the // tearDown() of the previous test HCatUtil.getHiveMetastoreClient(hiveConf); MapCreate.writeCount = 0; MapRead.readCount = 0; }
private static Properties getSerdeProperties(HCatTableInfo info, HCatSchema s) throws SerDeException { Properties props = new Properties(); List<FieldSchema> fields = HCatUtil.getFieldSchemaList(s.getFields()); props.setProperty( org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); props.setProperty( org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); // setting these props to match LazySimpleSerde props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1"); // add props from params set in table schema props.putAll(info.getStorerInfo().getProperties()); return props; }
@Override public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { try { OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize( tableDesc.getJobProperties().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); String parentPath = jobInfo.getTableInfo().getTableLocation(); String dynHash = tableDesc.getJobProperties().get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID); String idHash = tableDesc.getJobProperties().get(HCatConstants.HCAT_OUTPUT_ID_HASH); // For dynamic partitioned writes without all keyvalues specified, // we create a temp dir for the associated write job if (dynHash != null) { // if external table and custom root specified, update the parent path if (Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL")) && jobInfo.getCustomDynamicRoot() != null && jobInfo.getCustomDynamicRoot().length() > 0) { parentPath = new Path(parentPath, jobInfo.getCustomDynamicRoot()).toString(); } parentPath = new Path(parentPath, FileOutputCommitterContainer.DYNTEMP_DIR_NAME + dynHash) .toString(); } else { parentPath = new Path(parentPath, FileOutputCommitterContainer.SCRATCH_DIR_NAME + idHash).toString(); } String outputLocation; if ((dynHash != null) && Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL")) && jobInfo.getCustomDynamicPath() != null && jobInfo.getCustomDynamicPath().length() > 0) { // dynamic partitioning with custom path; resolve the custom path // using partition column values outputLocation = HCatFileUtil.resolveCustomPath(jobInfo, null, true); } else if ((dynHash == null) && Boolean.valueOf((String) tableDesc.getProperties().get("EXTERNAL")) && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { // honor custom location for external table apart from what metadata specifies outputLocation = jobInfo.getLocation(); } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) { // Unpartitioned table, writing to the scratch dir directly is good enough. outputLocation = ""; } else { List<String> cols = new ArrayList<String>(); List<String> values = new ArrayList<String>(); // Get the output location in the order partition keys are defined for the table. for (String name : jobInfo.getTableInfo().getPartitionColumns().getFieldNames()) { String value = jobInfo.getPartitionValues().get(name); cols.add(name); values.add(value); } outputLocation = FileUtils.makePartName(cols, values); } if (outputLocation != null && !outputLocation.isEmpty()) { jobInfo.setLocation(new Path(parentPath, outputLocation).toString()); } else { jobInfo.setLocation(new Path(parentPath).toString()); } // only set output dir if partition is fully materialized if (jobInfo.getPartitionValues().size() == jobInfo.getTableInfo().getPartitionColumns().size()) { jobProperties.put("mapred.output.dir", jobInfo.getLocation()); } SpecialCases.addSpecialCasesParametersToOutputJobProperties(jobProperties, jobInfo, ofClass); jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); } catch (IOException e) { throw new IllegalStateException("Failed to set output path", e); } }
/** * Run a local map reduce job to load data from in memory records to an HCatalog Table * * @param partitionValues * @param partitionColumns * @param records data to be written to HCatalog table * @param writeCount * @param assertWrite * @param asSingleMapTask * @return * @throws Exception */ Job runMRCreate( Map<String, String> partitionValues, List<HCatFieldSchema> partitionColumns, List<HCatRecord> records, int writeCount, boolean assertWrite, boolean asSingleMapTask, String customDynamicPathPattern) throws Exception { writeRecords = records; MapCreate.writeCount = 0; Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce write test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapCreate.class); // input/output settings job.setInputFormatClass(TextInputFormat.class); if (asSingleMapTask) { // One input path would mean only one map task Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); createInputFile(path, writeCount); TextInputFormat.setInputPaths(job, path); } else { // Create two input paths so that two map tasks get triggered. There could be other ways // to trigger two map tasks. Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); createInputFile(path, writeCount / 2); Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); createInputFile(path2, (writeCount - writeCount / 2)); TextInputFormat.setInputPaths(job, path, path2); } job.setOutputFormatClass(HCatOutputFormat.class); OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); if (customDynamicPathPattern != null) { job.getConfiguration() .set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern); } HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(DefaultHCatRecord.class); job.setNumReduceTasks(0); HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns)); boolean success = job.waitForCompletion(true); // Ensure counters are set when data has actually been read. if (partitionValues != null) { assertTrue( job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue() > 0); } if (!HCatUtil.isHadoop23()) { // Local mode outputcommitter hook is not invoked in Hadoop 1.x if (success) { new FileOutputCommitterContainer(job, null).commitJob(job); } else { new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED); } } if (assertWrite) { // we assert only if we expected to assert with this call. Assert.assertEquals(writeCount, MapCreate.writeCount); } if (isTableExternal()) { externalTableLocation = outputJobInfo.getTableInfo().getTableLocation(); } return job; }