Example #1
0
 /**
  * return the partition columns from a table instance
  *
  * @param table the instance to extract partition columns from
  * @return HCatSchema instance which contains the partition columns
  * @throws IOException
  */
 public static HCatSchema getPartitionColumns(Table table) throws IOException {
   HCatSchema cols = new HCatSchema(new LinkedList<HCatFieldSchema>());
   if (table.getPartitionKeys().size() != 0) {
     for (FieldSchema fs : table.getPartitionKeys()) {
       cols.append(HCatSchemaUtils.getHCatFieldSchema(fs));
     }
   }
   return cols;
 }
Example #2
0
  public static List<HCatFieldSchema> getHCatFieldSchemaList(FieldSchema... fields)
      throws HCatException {
    List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>(fields.length);

    for (FieldSchema f : fields) {
      result.add(HCatSchemaUtils.getHCatFieldSchema(f));
    }

    return result;
  }
Example #3
0
 public static List<HCatFieldSchema> getHCatFieldSchemaList(List<FieldSchema> fields)
     throws HCatException {
   if (fields == null) {
     return null;
   } else {
     List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>();
     for (FieldSchema f : fields) {
       result.add(HCatSchemaUtils.getHCatFieldSchema(f));
     }
     return result;
   }
 }
 static {
   try {
     FieldSchema keyCol = new FieldSchema("key", serdeConstants.STRING_TYPE_NAME, "");
     test1Cols.add(keyCol);
     test2Cols.add(keyCol);
     test3Cols.add(keyCol);
     hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol));
     hCattest2Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol));
     hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol));
     FieldSchema valueCol = new FieldSchema("value", serdeConstants.STRING_TYPE_NAME, "");
     test1Cols.add(valueCol);
     test3Cols.add(valueCol);
     hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol));
     hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol));
     FieldSchema extraCol = new FieldSchema("extra", serdeConstants.STRING_TYPE_NAME, "");
     test3Cols.add(extraCol);
     hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(extraCol));
     colMapping.put("test1", test1Cols);
     colMapping.put("test2", test2Cols);
     colMapping.put("test3", test3Cols);
   } catch (HCatException e) {
     LOG.error("Error in setting up schema fields for the table", e);
     throw new RuntimeException(e);
   }
 }
Example #5
0
  @BeforeClass
  public static void oneTimeSetUp() throws Exception {

    tableName = "testHCatPartitionedTable";
    writeRecords = new ArrayList<HCatRecord>();

    for (int i = 0; i < 20; i++) {
      List<Object> objList = new ArrayList<Object>();

      objList.add(i);
      objList.add("strvalue" + i);
      writeRecords.add(new DefaultHCatRecord(objList));
    }

    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
  }
Example #6
0
  public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException {
    HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols()));

    if (table.getPartitionKeys().size() != 0) {

      // add partition keys to table schema
      // NOTE : this assumes that we do not ever have ptn keys as columns
      // inside the table schema as well!
      for (FieldSchema fs : table.getPartitionKeys()) {
        tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs));
      }
    }
    return tableSchema;
  }
Example #7
0
  // check behavior while change the order of columns
  private void columnOrderChangeTest() throws Exception {

    HCatSchema tableSchema = getTableSchema();

    assertEquals(5, tableSchema.getFields().size());

    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));

    writeRecords = new ArrayList<HCatRecord>();

    for (int i = 0; i < 10; i++) {
      List<Object> objList = new ArrayList<Object>();

      objList.add(i);
      objList.add("co strvalue" + i);
      objList.add("co str2value" + i);

      writeRecords.add(new DefaultHCatRecord(objList));
    }

    Map<String, String> partitionMap = new HashMap<String, String>();
    partitionMap.put("part1", "p1value8");
    partitionMap.put("part0", "p0value8");

    Exception exc = null;
    try {
      runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    } catch (IOException e) {
      exc = e;
    }

    assertTrue(exc != null);
    assertTrue(exc instanceof HCatException);
    assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());

    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));

    writeRecords = new ArrayList<HCatRecord>();

    for (int i = 0; i < 10; i++) {
      List<Object> objList = new ArrayList<Object>();

      objList.add(i);
      objList.add("co strvalue" + i);

      writeRecords.add(new DefaultHCatRecord(objList));
    }

    runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);

    // Read should get 10 + 20 + 10 + 10 + 20 rows
    runMRRead(70);
  }
Example #8
0
  // test that new columns gets added to table schema
  private void tableSchemaTest() throws Exception {

    HCatSchema tableSchema = getTableSchema();

    assertEquals(4, tableSchema.getFields().size());

    // Update partition schema to have 3 fields
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));

    writeRecords = new ArrayList<HCatRecord>();

    for (int i = 0; i < 20; i++) {
      List<Object> objList = new ArrayList<Object>();

      objList.add(i);
      objList.add("strvalue" + i);
      objList.add("str2value" + i);

      writeRecords.add(new DefaultHCatRecord(objList));
    }

    Map<String, String> partitionMap = new HashMap<String, String>();
    partitionMap.put("part1", "p1value5");
    partitionMap.put("part0", "p0value5");

    runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);

    tableSchema = getTableSchema();

    // assert that c3 has got added to table schema
    assertEquals(5, tableSchema.getFields().size());
    assertEquals("c1", tableSchema.getFields().get(0).getName());
    assertEquals("c2", tableSchema.getFields().get(1).getName());
    assertEquals("c3", tableSchema.getFields().get(2).getName());
    assertEquals("part1", tableSchema.getFields().get(3).getName());
    assertEquals("part0", tableSchema.getFields().get(4).getName());

    // Test that changing column data type fails
    partitionMap.clear();
    partitionMap.put("part1", "p1value6");
    partitionMap.put("part0", "p0value6");

    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, "")));

    IOException exc = null;
    try {
      runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
    } catch (IOException e) {
      exc = e;
    }

    assertTrue(exc != null);
    assertTrue(exc instanceof HCatException);
    assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());

    // Test that partition key is not allowed in data
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, "")));

    List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
    for (int i = 0; i < 20; i++) {
      List<Object> objList = new ArrayList<Object>();

      objList.add(i);
      objList.add("c2value" + i);
      objList.add("c3value" + i);
      objList.add("p1value6");

      recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
    }

    exc = null;
    try {
      runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true);
    } catch (IOException e) {
      exc = e;
    }

    List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\"");
    assertEquals(20, records.size());
    records = runMRRead(20, "part0 = \"p0value6\"");
    assertEquals(20, records.size());
    Integer i = 0;
    for (HCatRecord rec : records) {
      assertEquals(5, rec.size());
      assertTrue(rec.get(0).equals(i));
      assertTrue(rec.get(1).equals("c2value" + i));
      assertTrue(rec.get(2).equals("c3value" + i));
      assertTrue(rec.get(3).equals("p1value6"));
      assertTrue(rec.get(4).equals("p0value6"));
      i++;
    }
  }