@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { HCatRecord record = null; String[] splits = value.toString().split(","); switch (i) { case 0: record = new DefaultHCatRecord(2); record.set(0, splits[0]); record.set(1, splits[1]); break; case 1: record = new DefaultHCatRecord(1); record.set(0, splits[0]); break; case 2: record = new DefaultHCatRecord(3); record.set(0, splits[0]); record.set(1, splits[1]); record.set(2, "extra"); break; default: Assert.fail("This should not happen!!!!!"); } MultiOutputFormat.write(tableNames[i], null, record, context); i++; }
/** * Simple test case. * * <ol> * <li>Submits a mapred job which writes out one fixed line to each of the tables * <li>uses hive fetch task to read the data and see if it matches what was written * </ol> * * @throws Exception if any error occurs */ @Test public void testOutputFormat() throws Throwable { HashMap<String, String> partitionValues = new HashMap<String, String>(); partitionValues.put("ds", "1"); partitionValues.put("cluster", "ag"); ArrayList<OutputJobInfo> infoList = new ArrayList<OutputJobInfo>(); infoList.add(OutputJobInfo.create("default", tableNames[0], partitionValues)); infoList.add(OutputJobInfo.create("default", tableNames[1], partitionValues)); infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues)); Job job = new Job(hiveConf, "SampleJob"); job.setMapperClass(MyMapper.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MultiOutputFormat.class); job.setNumReduceTasks(0); JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); for (int i = 0; i < tableNames.length; i++) { configurer.addOutputFormat( tableNames[i], HCatOutputFormat.class, BytesWritable.class, HCatRecord.class); HCatOutputFormat.setOutput(configurer.getJob(tableNames[i]), infoList.get(i)); HCatOutputFormat.setSchema(configurer.getJob(tableNames[i]), schemaMap.get(tableNames[i])); } configurer.configure(); Path filePath = createInputFile(); FileInputFormat.addInputPath(job, filePath); Assert.assertTrue(job.waitForCompletion(true)); ArrayList<String> outputs = new ArrayList<String>(); for (String tbl : tableNames) { outputs.add(getTableData(tbl, "default").get(0)); } Assert.assertEquals( "Comparing output of table " + tableNames[0] + " is not correct", outputs.get(0), "a,a,1,ag"); Assert.assertEquals( "Comparing output of table " + tableNames[1] + " is not correct", outputs.get(1), "a,1,ag"); Assert.assertEquals( "Comparing output of table " + tableNames[2] + " is not correct", outputs.get(2), "a,a,extra,1,ag"); // Check permisssion on partition dirs and files created for (int i = 0; i < tableNames.length; i++) { Path partitionFile = new Path(warehousedir + "/" + tableNames[i] + "/ds=1/cluster=ag/part-m-00000"); FileSystem fs = partitionFile.getFileSystem(mrConf); Assert.assertEquals( "File permissions of table " + tableNames[i] + " is not correct", fs.getFileStatus(partitionFile).getPermission(), new FsPermission(tablePerms[i])); Assert.assertEquals( "File permissions of table " + tableNames[i] + " is not correct", fs.getFileStatus(partitionFile.getParent()).getPermission(), new FsPermission(tablePerms[i])); Assert.assertEquals( "File permissions of table " + tableNames[i] + " is not correct", fs.getFileStatus(partitionFile.getParent().getParent()).getPermission(), new FsPermission(tablePerms[i])); } LOG.info("File permissions verified"); }