Example #1
0
 /**
  * return the partition columns from a table instance
  *
  * @param table the instance to extract partition columns from
  * @return HCatSchema instance which contains the partition columns
  * @throws IOException
  */
 public static HCatSchema getPartitionColumns(Table table) throws IOException {
   HCatSchema cols = new HCatSchema(new LinkedList<HCatFieldSchema>());
   if (table.getPartitionKeys().size() != 0) {
     for (FieldSchema fs : table.getPartitionKeys()) {
       cols.append(HCatSchemaUtils.getHCatFieldSchema(fs));
     }
   }
   return cols;
 }
Example #2
0
  public static List<HCatFieldSchema> getHCatFieldSchemaList(FieldSchema... fields)
      throws HCatException {
    List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>(fields.length);

    for (FieldSchema f : fields) {
      result.add(HCatSchemaUtils.getHCatFieldSchema(f));
    }

    return result;
  }
Example #3
0
 public static List<FieldSchema> getFieldSchemaList(List<HCatFieldSchema> hcatFields) {
   if (hcatFields == null) {
     return null;
   } else {
     List<FieldSchema> result = new ArrayList<FieldSchema>();
     for (HCatFieldSchema f : hcatFields) {
       result.add(HCatSchemaUtils.getFieldSchema(f));
     }
     return result;
   }
 }
Example #4
0
 public static List<HCatFieldSchema> getHCatFieldSchemaList(List<FieldSchema> fields)
     throws HCatException {
   if (fields == null) {
     return null;
   } else {
     List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>();
     for (FieldSchema f : fields) {
       result.add(HCatSchemaUtils.getHCatFieldSchema(f));
     }
     return result;
   }
 }
 static {
   try {
     FieldSchema keyCol = new FieldSchema("key", serdeConstants.STRING_TYPE_NAME, "");
     test1Cols.add(keyCol);
     test2Cols.add(keyCol);
     test3Cols.add(keyCol);
     hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol));
     hCattest2Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol));
     hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol));
     FieldSchema valueCol = new FieldSchema("value", serdeConstants.STRING_TYPE_NAME, "");
     test1Cols.add(valueCol);
     test3Cols.add(valueCol);
     hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol));
     hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol));
     FieldSchema extraCol = new FieldSchema("extra", serdeConstants.STRING_TYPE_NAME, "");
     test3Cols.add(extraCol);
     hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(extraCol));
     colMapping.put("test1", test1Cols);
     colMapping.put("test2", test2Cols);
     colMapping.put("test3", test3Cols);
   } catch (HCatException e) {
     LOG.error("Error in setting up schema fields for the table", e);
     throw new RuntimeException(e);
   }
 }
Example #6
0
  @BeforeClass
  public static void oneTimeSetUp() throws Exception {

    tableName = "testHCatPartitionedTable";
    writeRecords = new ArrayList<HCatRecord>();

    for (int i = 0; i < 20; i++) {
      List<Object> objList = new ArrayList<Object>();

      objList.add(i);
      objList.add("strvalue" + i);
      writeRecords.add(new DefaultHCatRecord(objList));
    }

    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
  }
Example #7
0
  public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException {
    HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols()));

    if (table.getPartitionKeys().size() != 0) {

      // add partition keys to table schema
      // NOTE : this assumes that we do not ever have ptn keys as columns
      // inside the table schema as well!
      for (FieldSchema fs : table.getPartitionKeys()) {
        tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs));
      }
    }
    return tableSchema;
  }
Example #8
0
  // check behavior while change the order of columns
  private void columnOrderChangeTest() throws Exception {

    HCatSchema tableSchema = getTableSchema();

    assertEquals(5, tableSchema.getFields().size());

    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));

    writeRecords = new ArrayList<HCatRecord>();

    for (int i = 0; i < 10; i++) {
      List<Object> objList = new ArrayList<Object>();

      objList.add(i);
      objList.add("co strvalue" + i);
      objList.add("co str2value" + i);

      writeRecords.add(new DefaultHCatRecord(objList));
    }

    Map<String, String> partitionMap = new HashMap<String, String>();
    partitionMap.put("part1", "p1value8");
    partitionMap.put("part0", "p0value8");

    Exception exc = null;
    try {
      runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    } catch (IOException e) {
      exc = e;
    }

    assertTrue(exc != null);
    assertTrue(exc instanceof HCatException);
    assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());

    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));

    writeRecords = new ArrayList<HCatRecord>();

    for (int i = 0; i < 10; i++) {
      List<Object> objList = new ArrayList<Object>();

      objList.add(i);
      objList.add("co strvalue" + i);

      writeRecords.add(new DefaultHCatRecord(objList));
    }

    runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);

    // Read should get 10 + 20 + 10 + 10 + 20 rows
    runMRRead(70);
  }
Example #9
0
  // test that new columns gets added to table schema
  private void tableSchemaTest() throws Exception {

    HCatSchema tableSchema = getTableSchema();

    assertEquals(4, tableSchema.getFields().size());

    // Update partition schema to have 3 fields
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));

    writeRecords = new ArrayList<HCatRecord>();

    for (int i = 0; i < 20; i++) {
      List<Object> objList = new ArrayList<Object>();

      objList.add(i);
      objList.add("strvalue" + i);
      objList.add("str2value" + i);

      writeRecords.add(new DefaultHCatRecord(objList));
    }

    Map<String, String> partitionMap = new HashMap<String, String>();
    partitionMap.put("part1", "p1value5");
    partitionMap.put("part0", "p0value5");

    runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);

    tableSchema = getTableSchema();

    // assert that c3 has got added to table schema
    assertEquals(5, tableSchema.getFields().size());
    assertEquals("c1", tableSchema.getFields().get(0).getName());
    assertEquals("c2", tableSchema.getFields().get(1).getName());
    assertEquals("c3", tableSchema.getFields().get(2).getName());
    assertEquals("part1", tableSchema.getFields().get(3).getName());
    assertEquals("part0", tableSchema.getFields().get(4).getName());

    // Test that changing column data type fails
    partitionMap.clear();
    partitionMap.put("part1", "p1value6");
    partitionMap.put("part0", "p0value6");

    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, "")));

    IOException exc = null;
    try {
      runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
    } catch (IOException e) {
      exc = e;
    }

    assertTrue(exc != null);
    assertTrue(exc instanceof HCatException);
    assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());

    // Test that partition key is not allowed in data
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(
        HCatSchemaUtils.getHCatFieldSchema(
            new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, "")));

    List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
    for (int i = 0; i < 20; i++) {
      List<Object> objList = new ArrayList<Object>();

      objList.add(i);
      objList.add("c2value" + i);
      objList.add("c3value" + i);
      objList.add("p1value6");

      recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
    }

    exc = null;
    try {
      runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true);
    } catch (IOException e) {
      exc = e;
    }

    List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\"");
    assertEquals(20, records.size());
    records = runMRRead(20, "part0 = \"p0value6\"");
    assertEquals(20, records.size());
    Integer i = 0;
    for (HCatRecord rec : records) {
      assertEquals(5, rec.size());
      assertTrue(rec.get(0).equals(i));
      assertTrue(rec.get(1).equals("c2value" + i));
      assertTrue(rec.get(2).equals("c3value" + i));
      assertTrue(rec.get(3).equals("p1value6"));
      assertTrue(rec.get(4).equals("p0value6"));
      i++;
    }
  }
Example #10
0
  /**
   * Validate partition schema, checks if the column types match between the partition and the
   * existing table schema. Returns the list of columns present in the partition but not in the
   * table.
   *
   * @param table the table
   * @param partitionSchema the partition schema
   * @return the list of newly added fields
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public static List<FieldSchema> validatePartitionSchema(Table table, HCatSchema partitionSchema)
      throws IOException {
    Map<String, FieldSchema> partitionKeyMap = new HashMap<String, FieldSchema>();

    for (FieldSchema field : table.getPartitionKeys()) {
      partitionKeyMap.put(field.getName().toLowerCase(), field);
    }

    List<FieldSchema> tableCols = table.getCols();
    List<FieldSchema> newFields = new ArrayList<FieldSchema>();

    for (int i = 0; i < partitionSchema.getFields().size(); i++) {

      FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema.getFields().get(i));

      FieldSchema tableField;
      if (i < tableCols.size()) {
        tableField = tableCols.get(i);

        if (!tableField.getName().equalsIgnoreCase(field.getName())) {
          throw new HCatException(
              ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH,
              "Expected column <"
                  + tableField.getName()
                  + "> at position "
                  + (i + 1)
                  + ", found column <"
                  + field.getName()
                  + ">");
        }
      } else {
        tableField = partitionKeyMap.get(field.getName().toLowerCase());

        if (tableField != null) {
          throw new HCatException(
              ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" + field.getName() + ">");
        }
      }

      if (tableField == null) {
        // field present in partition but not in table
        newFields.add(field);
      } else {
        // field present in both. validate type has not changed
        TypeInfo partitionType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
        TypeInfo tableType = TypeInfoUtils.getTypeInfoFromTypeString(tableField.getType());

        if (!partitionType.equals(tableType)) {
          throw new HCatException(
              ErrorType.ERROR_SCHEMA_TYPE_MISMATCH,
              "Column <"
                  + field.getName()
                  + ">, expected <"
                  + tableType.getTypeName()
                  + ">, got <"
                  + partitionType.getTypeName()
                  + ">");
        }
      }
    }

    return newFields;
  }