/** * return the partition columns from a table instance * * @param table the instance to extract partition columns from * @return HCatSchema instance which contains the partition columns * @throws IOException */ public static HCatSchema getPartitionColumns(Table table) throws IOException { HCatSchema cols = new HCatSchema(new LinkedList<HCatFieldSchema>()); if (table.getPartitionKeys().size() != 0) { for (FieldSchema fs : table.getPartitionKeys()) { cols.append(HCatSchemaUtils.getHCatFieldSchema(fs)); } } return cols; }
public static List<HCatFieldSchema> getHCatFieldSchemaList(FieldSchema... fields) throws HCatException { List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>(fields.length); for (FieldSchema f : fields) { result.add(HCatSchemaUtils.getHCatFieldSchema(f)); } return result; }
public static List<HCatFieldSchema> getHCatFieldSchemaList(List<FieldSchema> fields) throws HCatException { if (fields == null) { return null; } else { List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>(); for (FieldSchema f : fields) { result.add(HCatSchemaUtils.getHCatFieldSchema(f)); } return result; } }
static { try { FieldSchema keyCol = new FieldSchema("key", serdeConstants.STRING_TYPE_NAME, ""); test1Cols.add(keyCol); test2Cols.add(keyCol); test3Cols.add(keyCol); hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); hCattest2Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); FieldSchema valueCol = new FieldSchema("value", serdeConstants.STRING_TYPE_NAME, ""); test1Cols.add(valueCol); test3Cols.add(valueCol); hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); FieldSchema extraCol = new FieldSchema("extra", serdeConstants.STRING_TYPE_NAME, ""); test3Cols.add(extraCol); hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(extraCol)); colMapping.put("test1", test1Cols); colMapping.put("test2", test2Cols); colMapping.put("test3", test3Cols); } catch (HCatException e) { LOG.error("Error in setting up schema fields for the table", e); throw new RuntimeException(e); } }
@BeforeClass public static void oneTimeSetUp() throws Exception { tableName = "testHCatPartitionedTable"; writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < 20; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("strvalue" + i); writeRecords.add(new DefaultHCatRecord(objList)); } partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); }
public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); if (table.getPartitionKeys().size() != 0) { // add partition keys to table schema // NOTE : this assumes that we do not ever have ptn keys as columns // inside the table schema as well! for (FieldSchema fs : table.getPartitionKeys()) { tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs)); } } return tableSchema; }
// check behavior while change the order of columns private void columnOrderChangeTest() throws Exception { HCatSchema tableSchema = getTableSchema(); assertEquals(5, tableSchema.getFields().size()); partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < 10; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("co strvalue" + i); objList.add("co str2value" + i); writeRecords.add(new DefaultHCatRecord(objList)); } Map<String, String> partitionMap = new HashMap<String, String>(); partitionMap.put("part1", "p1value8"); partitionMap.put("part0", "p0value8"); Exception exc = null; try { runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); } catch (IOException e) { exc = e; } assertTrue(exc != null); assertTrue(exc instanceof HCatException); assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType()); partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < 10; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("co strvalue" + i); writeRecords.add(new DefaultHCatRecord(objList)); } runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); // Read should get 10 + 20 + 10 + 10 + 20 rows runMRRead(70); }
// test that new columns gets added to table schema private void tableSchemaTest() throws Exception { HCatSchema tableSchema = getTableSchema(); assertEquals(4, tableSchema.getFields().size()); // Update partition schema to have 3 fields partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < 20; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("strvalue" + i); objList.add("str2value" + i); writeRecords.add(new DefaultHCatRecord(objList)); } Map<String, String> partitionMap = new HashMap<String, String>(); partitionMap.put("part1", "p1value5"); partitionMap.put("part0", "p0value5"); runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); tableSchema = getTableSchema(); // assert that c3 has got added to table schema assertEquals(5, tableSchema.getFields().size()); assertEquals("c1", tableSchema.getFields().get(0).getName()); assertEquals("c2", tableSchema.getFields().get(1).getName()); assertEquals("c3", tableSchema.getFields().get(2).getName()); assertEquals("part1", tableSchema.getFields().get(3).getName()); assertEquals("part0", tableSchema.getFields().get(4).getName()); // Test that changing column data type fails partitionMap.clear(); partitionMap.put("part1", "p1value6"); partitionMap.put("part0", "p0value6"); partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, ""))); IOException exc = null; try { runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); } catch (IOException e) { exc = e; } assertTrue(exc != null); assertTrue(exc instanceof HCatException); assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType()); // Test that partition key is not allowed in data partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, ""))); List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20); for (int i = 0; i < 20; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("c2value" + i); objList.add("c3value" + i); objList.add("p1value6"); recordsContainingPartitionCols.add(new DefaultHCatRecord(objList)); } exc = null; try { runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true); } catch (IOException e) { exc = e; } List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\""); assertEquals(20, records.size()); records = runMRRead(20, "part0 = \"p0value6\""); assertEquals(20, records.size()); Integer i = 0; for (HCatRecord rec : records) { assertEquals(5, rec.size()); assertTrue(rec.get(0).equals(i)); assertTrue(rec.get(1).equals("c2value" + i)); assertTrue(rec.get(2).equals("c3value" + i)); assertTrue(rec.get(3).equals("p1value6")); assertTrue(rec.get(4).equals("p0value6")); i++; } }