Esempio n. 1
0
  // TODO: All Hive-stats related tests are temporarily disabled because of an unknown,
  // sporadic issue causing stats of some columns to be absent in Jenkins runs.
  // Investigate this issue further.
  // @Test
  public void testColStatsColTypeMismatch() throws Exception {
    // First load a table that has column stats.
    // catalog_.refreshTable("functional", "alltypesagg", false);
    HdfsTable table = (HdfsTable) catalog_.getDb("functional").getTable("alltypesagg");

    // Now attempt to update a column's stats with mismatched stats data and ensure
    // we get the expected results.
    MetaStoreClient client = catalog_.getMetaStoreClient();
    try {
      // Load some string stats data and use it to update the stats of different
      // typed columns.
      ColumnStatisticsData stringColStatsData =
          client
              .getHiveClient()
              .getTableColumnStatistics("functional", "alltypesagg", "string_col")
              .getStatsObj()
              .get(0)
              .getStatsData();

      assertTrue(!table.getColumn("int_col").updateStats(stringColStatsData));
      assertStatsUnknown(table.getColumn("int_col"));

      assertTrue(!table.getColumn("double_col").updateStats(stringColStatsData));
      assertStatsUnknown(table.getColumn("double_col"));

      assertTrue(!table.getColumn("bool_col").updateStats(stringColStatsData));
      assertStatsUnknown(table.getColumn("bool_col"));

      // Do the same thing, but apply bigint stats to a string column.
      ColumnStatisticsData bigIntCol =
          client
              .getHiveClient()
              .getTableColumnStatistics("functional", "alltypes", "bigint_col")
              .getStatsObj()
              .get(0)
              .getStatsData();
      assertTrue(!table.getColumn("string_col").updateStats(bigIntCol));
      assertStatsUnknown(table.getColumn("string_col"));

      // Now try to apply a matching column stats data and ensure it succeeds.
      assertTrue(table.getColumn("string_col").updateStats(stringColStatsData));
      assertEquals(1178, table.getColumn("string_col").getStats().getNumDistinctValues());
    } finally {
      // Make sure to invalidate the metadata so the next test isn't using bad col stats
      // catalog_.refreshTable("functional", "alltypesagg", false);
      client.release();
    }
  }
Esempio n. 2
0
  // TODO: All Hive-stats related tests are temporarily disabled because of an unknown,
  // sporadic issue causing stats of some columns to be absent in Jenkins runs.
  // Investigate this issue further.
  // @Test
  public void testStats() throws TableLoadingException {
    // make sure the stats for functional.alltypesagg look correct
    HdfsTable table = (HdfsTable) catalog_.getDb("functional").getTable("AllTypesAgg");

    Column idCol = table.getColumn("id");
    assertEquals(
        idCol.getStats().getAvgSerializedSize() - PrimitiveType.INT.getSlotSize(),
        PrimitiveType.INT.getSlotSize(),
        0.0001);
    assertEquals(idCol.getStats().getMaxSize(), PrimitiveType.INT.getSlotSize());
    assertTrue(!idCol.getStats().hasNulls());

    Column boolCol = table.getColumn("bool_col");
    assertEquals(
        boolCol.getStats().getAvgSerializedSize() - PrimitiveType.BOOLEAN.getSlotSize(),
        PrimitiveType.BOOLEAN.getSlotSize(),
        0.0001);
    assertEquals(boolCol.getStats().getMaxSize(), PrimitiveType.BOOLEAN.getSlotSize());
    assertTrue(!boolCol.getStats().hasNulls());

    Column tinyintCol = table.getColumn("tinyint_col");
    assertEquals(
        tinyintCol.getStats().getAvgSerializedSize() - PrimitiveType.TINYINT.getSlotSize(),
        PrimitiveType.TINYINT.getSlotSize(),
        0.0001);
    assertEquals(tinyintCol.getStats().getMaxSize(), PrimitiveType.TINYINT.getSlotSize());
    assertTrue(tinyintCol.getStats().hasNulls());

    Column smallintCol = table.getColumn("smallint_col");
    assertEquals(
        smallintCol.getStats().getAvgSerializedSize() - PrimitiveType.SMALLINT.getSlotSize(),
        PrimitiveType.SMALLINT.getSlotSize(),
        0.0001);
    assertEquals(smallintCol.getStats().getMaxSize(), PrimitiveType.SMALLINT.getSlotSize());
    assertTrue(smallintCol.getStats().hasNulls());

    Column intCol = table.getColumn("int_col");
    assertEquals(
        intCol.getStats().getAvgSerializedSize() - PrimitiveType.INT.getSlotSize(),
        PrimitiveType.INT.getSlotSize(),
        0.0001);
    assertEquals(intCol.getStats().getMaxSize(), PrimitiveType.INT.getSlotSize());
    assertTrue(intCol.getStats().hasNulls());

    Column bigintCol = table.getColumn("bigint_col");
    assertEquals(
        bigintCol.getStats().getAvgSerializedSize() - PrimitiveType.BIGINT.getSlotSize(),
        PrimitiveType.BIGINT.getSlotSize(),
        0.0001);
    assertEquals(bigintCol.getStats().getMaxSize(), PrimitiveType.BIGINT.getSlotSize());
    assertTrue(bigintCol.getStats().hasNulls());

    Column floatCol = table.getColumn("float_col");
    assertEquals(
        floatCol.getStats().getAvgSerializedSize() - PrimitiveType.FLOAT.getSlotSize(),
        PrimitiveType.FLOAT.getSlotSize(),
        0.0001);
    assertEquals(floatCol.getStats().getMaxSize(), PrimitiveType.FLOAT.getSlotSize());
    assertTrue(floatCol.getStats().hasNulls());

    Column doubleCol = table.getColumn("double_col");
    assertEquals(
        doubleCol.getStats().getAvgSerializedSize() - PrimitiveType.DOUBLE.getSlotSize(),
        PrimitiveType.DOUBLE.getSlotSize(),
        0.0001);
    assertEquals(doubleCol.getStats().getMaxSize(), PrimitiveType.DOUBLE.getSlotSize());
    assertTrue(doubleCol.getStats().hasNulls());

    Column timestampCol = table.getColumn("timestamp_col");
    assertEquals(
        timestampCol.getStats().getAvgSerializedSize() - PrimitiveType.TIMESTAMP.getSlotSize(),
        PrimitiveType.TIMESTAMP.getSlotSize(),
        0.0001);
    assertEquals(timestampCol.getStats().getMaxSize(), PrimitiveType.TIMESTAMP.getSlotSize());
    // this does not have nulls, it's not clear why this passes
    // TODO: investigate and re-enable
    // assertTrue(timestampCol.getStats().hasNulls());

    Column stringCol = table.getColumn("string_col");
    assertTrue(stringCol.getStats().getAvgSerializedSize() >= PrimitiveType.STRING.getSlotSize());
    assertTrue(stringCol.getStats().getAvgSerializedSize() > 0);
    assertTrue(stringCol.getStats().getMaxSize() > 0);
    assertTrue(!stringCol.getStats().hasNulls());
  }