/** * return the partition columns from a table instance * * @param table the instance to extract partition columns from * @return HCatSchema instance which contains the partition columns * @throws IOException */ public static HCatSchema getPartitionColumns(Table table) throws IOException { HCatSchema cols = new HCatSchema(new LinkedList<HCatFieldSchema>()); if (table.getPartitionKeys().size() != 0) { for (FieldSchema fs : table.getPartitionKeys()) { cols.append(HCatSchemaUtils.getHCatFieldSchema(fs)); } } return cols; }
public static List<HCatFieldSchema> getHCatFieldSchemaList(FieldSchema... fields) throws HCatException { List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>(fields.length); for (FieldSchema f : fields) { result.add(HCatSchemaUtils.getHCatFieldSchema(f)); } return result; }
public static List<FieldSchema> getFieldSchemaList(List<HCatFieldSchema> hcatFields) { if (hcatFields == null) { return null; } else { List<FieldSchema> result = new ArrayList<FieldSchema>(); for (HCatFieldSchema f : hcatFields) { result.add(HCatSchemaUtils.getFieldSchema(f)); } return result; } }
public static List<HCatFieldSchema> getHCatFieldSchemaList(List<FieldSchema> fields) throws HCatException { if (fields == null) { return null; } else { List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>(); for (FieldSchema f : fields) { result.add(HCatSchemaUtils.getHCatFieldSchema(f)); } return result; } }
static { try { FieldSchema keyCol = new FieldSchema("key", serdeConstants.STRING_TYPE_NAME, ""); test1Cols.add(keyCol); test2Cols.add(keyCol); test3Cols.add(keyCol); hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); hCattest2Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); FieldSchema valueCol = new FieldSchema("value", serdeConstants.STRING_TYPE_NAME, ""); test1Cols.add(valueCol); test3Cols.add(valueCol); hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); FieldSchema extraCol = new FieldSchema("extra", serdeConstants.STRING_TYPE_NAME, ""); test3Cols.add(extraCol); hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(extraCol)); colMapping.put("test1", test1Cols); colMapping.put("test2", test2Cols); colMapping.put("test3", test3Cols); } catch (HCatException e) { LOG.error("Error in setting up schema fields for the table", e); throw new RuntimeException(e); } }
@BeforeClass public static void oneTimeSetUp() throws Exception { tableName = "testHCatPartitionedTable"; writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < 20; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("strvalue" + i); writeRecords.add(new DefaultHCatRecord(objList)); } partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); }
public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); if (table.getPartitionKeys().size() != 0) { // add partition keys to table schema // NOTE : this assumes that we do not ever have ptn keys as columns // inside the table schema as well! for (FieldSchema fs : table.getPartitionKeys()) { tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs)); } } return tableSchema; }
// check behavior while change the order of columns private void columnOrderChangeTest() throws Exception { HCatSchema tableSchema = getTableSchema(); assertEquals(5, tableSchema.getFields().size()); partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < 10; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("co strvalue" + i); objList.add("co str2value" + i); writeRecords.add(new DefaultHCatRecord(objList)); } Map<String, String> partitionMap = new HashMap<String, String>(); partitionMap.put("part1", "p1value8"); partitionMap.put("part0", "p0value8"); Exception exc = null; try { runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); } catch (IOException e) { exc = e; } assertTrue(exc != null); assertTrue(exc instanceof HCatException); assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType()); partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < 10; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("co strvalue" + i); writeRecords.add(new DefaultHCatRecord(objList)); } runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); // Read should get 10 + 20 + 10 + 10 + 20 rows runMRRead(70); }
// test that new columns gets added to table schema private void tableSchemaTest() throws Exception { HCatSchema tableSchema = getTableSchema(); assertEquals(4, tableSchema.getFields().size()); // Update partition schema to have 3 fields partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < 20; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("strvalue" + i); objList.add("str2value" + i); writeRecords.add(new DefaultHCatRecord(objList)); } Map<String, String> partitionMap = new HashMap<String, String>(); partitionMap.put("part1", "p1value5"); partitionMap.put("part0", "p0value5"); runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); tableSchema = getTableSchema(); // assert that c3 has got added to table schema assertEquals(5, tableSchema.getFields().size()); assertEquals("c1", tableSchema.getFields().get(0).getName()); assertEquals("c2", tableSchema.getFields().get(1).getName()); assertEquals("c3", tableSchema.getFields().get(2).getName()); assertEquals("part1", tableSchema.getFields().get(3).getName()); assertEquals("part0", tableSchema.getFields().get(4).getName()); // Test that changing column data type fails partitionMap.clear(); partitionMap.put("part1", "p1value6"); partitionMap.put("part0", "p0value6"); partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, ""))); IOException exc = null; try { runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); } catch (IOException e) { exc = e; } assertTrue(exc != null); assertTrue(exc instanceof HCatException); assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType()); // Test that partition key is not allowed in data partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); partitionColumns.add( HCatSchemaUtils.getHCatFieldSchema( new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, ""))); List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20); for (int i = 0; i < 20; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("c2value" + i); objList.add("c3value" + i); objList.add("p1value6"); recordsContainingPartitionCols.add(new DefaultHCatRecord(objList)); } exc = null; try { runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true); } catch (IOException e) { exc = e; } List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\""); assertEquals(20, records.size()); records = runMRRead(20, "part0 = \"p0value6\""); assertEquals(20, records.size()); Integer i = 0; for (HCatRecord rec : records) { assertEquals(5, rec.size()); assertTrue(rec.get(0).equals(i)); assertTrue(rec.get(1).equals("c2value" + i)); assertTrue(rec.get(2).equals("c3value" + i)); assertTrue(rec.get(3).equals("p1value6")); assertTrue(rec.get(4).equals("p0value6")); i++; } }
/** * Validate partition schema, checks if the column types match between the partition and the * existing table schema. Returns the list of columns present in the partition but not in the * table. * * @param table the table * @param partitionSchema the partition schema * @return the list of newly added fields * @throws IOException Signals that an I/O exception has occurred. */ public static List<FieldSchema> validatePartitionSchema(Table table, HCatSchema partitionSchema) throws IOException { Map<String, FieldSchema> partitionKeyMap = new HashMap<String, FieldSchema>(); for (FieldSchema field : table.getPartitionKeys()) { partitionKeyMap.put(field.getName().toLowerCase(), field); } List<FieldSchema> tableCols = table.getCols(); List<FieldSchema> newFields = new ArrayList<FieldSchema>(); for (int i = 0; i < partitionSchema.getFields().size(); i++) { FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema.getFields().get(i)); FieldSchema tableField; if (i < tableCols.size()) { tableField = tableCols.get(i); if (!tableField.getName().equalsIgnoreCase(field.getName())) { throw new HCatException( ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, "Expected column <" + tableField.getName() + "> at position " + (i + 1) + ", found column <" + field.getName() + ">"); } } else { tableField = partitionKeyMap.get(field.getName().toLowerCase()); if (tableField != null) { throw new HCatException( ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" + field.getName() + ">"); } } if (tableField == null) { // field present in partition but not in table newFields.add(field); } else { // field present in both. validate type has not changed TypeInfo partitionType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType()); TypeInfo tableType = TypeInfoUtils.getTypeInfoFromTypeString(tableField.getType()); if (!partitionType.equals(tableType)) { throw new HCatException( ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" + field.getName() + ">, expected <" + tableType.getTypeName() + ">, got <" + partitionType.getTypeName() + ">"); } } } return newFields; }