// TODO: All Hive-stats related tests are temporarily disabled because of an unknown, // sporadic issue causing stats of some columns to be absent in Jenkins runs. // Investigate this issue further. // @Test public void testColStatsColTypeMismatch() throws Exception { // First load a table that has column stats. // catalog_.refreshTable("functional", "alltypesagg", false); HdfsTable table = (HdfsTable) catalog_.getDb("functional").getTable("alltypesagg"); // Now attempt to update a column's stats with mismatched stats data and ensure // we get the expected results. MetaStoreClient client = catalog_.getMetaStoreClient(); try { // Load some string stats data and use it to update the stats of different // typed columns. ColumnStatisticsData stringColStatsData = client .getHiveClient() .getTableColumnStatistics("functional", "alltypesagg", "string_col") .getStatsObj() .get(0) .getStatsData(); assertTrue(!table.getColumn("int_col").updateStats(stringColStatsData)); assertStatsUnknown(table.getColumn("int_col")); assertTrue(!table.getColumn("double_col").updateStats(stringColStatsData)); assertStatsUnknown(table.getColumn("double_col")); assertTrue(!table.getColumn("bool_col").updateStats(stringColStatsData)); assertStatsUnknown(table.getColumn("bool_col")); // Do the same thing, but apply bigint stats to a string column. ColumnStatisticsData bigIntCol = client .getHiveClient() .getTableColumnStatistics("functional", "alltypes", "bigint_col") .getStatsObj() .get(0) .getStatsData(); assertTrue(!table.getColumn("string_col").updateStats(bigIntCol)); assertStatsUnknown(table.getColumn("string_col")); // Now try to apply a matching column stats data and ensure it succeeds. assertTrue(table.getColumn("string_col").updateStats(stringColStatsData)); assertEquals(1178, table.getColumn("string_col").getStats().getNumDistinctValues()); } finally { // Make sure to invalidate the metadata so the next test isn't using bad col stats // catalog_.refreshTable("functional", "alltypesagg", false); client.release(); } }
// TODO: All Hive-stats related tests are temporarily disabled because of an unknown, // sporadic issue causing stats of some columns to be absent in Jenkins runs. // Investigate this issue further. // @Test public void testStats() throws TableLoadingException { // make sure the stats for functional.alltypesagg look correct HdfsTable table = (HdfsTable) catalog_.getDb("functional").getTable("AllTypesAgg"); Column idCol = table.getColumn("id"); assertEquals( idCol.getStats().getAvgSerializedSize() - PrimitiveType.INT.getSlotSize(), PrimitiveType.INT.getSlotSize(), 0.0001); assertEquals(idCol.getStats().getMaxSize(), PrimitiveType.INT.getSlotSize()); assertTrue(!idCol.getStats().hasNulls()); Column boolCol = table.getColumn("bool_col"); assertEquals( boolCol.getStats().getAvgSerializedSize() - PrimitiveType.BOOLEAN.getSlotSize(), PrimitiveType.BOOLEAN.getSlotSize(), 0.0001); assertEquals(boolCol.getStats().getMaxSize(), PrimitiveType.BOOLEAN.getSlotSize()); assertTrue(!boolCol.getStats().hasNulls()); Column tinyintCol = table.getColumn("tinyint_col"); assertEquals( tinyintCol.getStats().getAvgSerializedSize() - PrimitiveType.TINYINT.getSlotSize(), PrimitiveType.TINYINT.getSlotSize(), 0.0001); assertEquals(tinyintCol.getStats().getMaxSize(), PrimitiveType.TINYINT.getSlotSize()); assertTrue(tinyintCol.getStats().hasNulls()); Column smallintCol = table.getColumn("smallint_col"); assertEquals( smallintCol.getStats().getAvgSerializedSize() - PrimitiveType.SMALLINT.getSlotSize(), PrimitiveType.SMALLINT.getSlotSize(), 0.0001); assertEquals(smallintCol.getStats().getMaxSize(), PrimitiveType.SMALLINT.getSlotSize()); assertTrue(smallintCol.getStats().hasNulls()); Column intCol = table.getColumn("int_col"); assertEquals( intCol.getStats().getAvgSerializedSize() - PrimitiveType.INT.getSlotSize(), PrimitiveType.INT.getSlotSize(), 0.0001); assertEquals(intCol.getStats().getMaxSize(), PrimitiveType.INT.getSlotSize()); assertTrue(intCol.getStats().hasNulls()); Column bigintCol = table.getColumn("bigint_col"); assertEquals( bigintCol.getStats().getAvgSerializedSize() - PrimitiveType.BIGINT.getSlotSize(), PrimitiveType.BIGINT.getSlotSize(), 0.0001); assertEquals(bigintCol.getStats().getMaxSize(), PrimitiveType.BIGINT.getSlotSize()); assertTrue(bigintCol.getStats().hasNulls()); Column floatCol = table.getColumn("float_col"); assertEquals( floatCol.getStats().getAvgSerializedSize() - PrimitiveType.FLOAT.getSlotSize(), PrimitiveType.FLOAT.getSlotSize(), 0.0001); assertEquals(floatCol.getStats().getMaxSize(), PrimitiveType.FLOAT.getSlotSize()); assertTrue(floatCol.getStats().hasNulls()); Column doubleCol = table.getColumn("double_col"); assertEquals( doubleCol.getStats().getAvgSerializedSize() - PrimitiveType.DOUBLE.getSlotSize(), PrimitiveType.DOUBLE.getSlotSize(), 0.0001); assertEquals(doubleCol.getStats().getMaxSize(), PrimitiveType.DOUBLE.getSlotSize()); assertTrue(doubleCol.getStats().hasNulls()); Column timestampCol = table.getColumn("timestamp_col"); assertEquals( timestampCol.getStats().getAvgSerializedSize() - PrimitiveType.TIMESTAMP.getSlotSize(), PrimitiveType.TIMESTAMP.getSlotSize(), 0.0001); assertEquals(timestampCol.getStats().getMaxSize(), PrimitiveType.TIMESTAMP.getSlotSize()); // this does not have nulls, it's not clear why this passes // TODO: investigate and re-enable // assertTrue(timestampCol.getStats().hasNulls()); Column stringCol = table.getColumn("string_col"); assertTrue(stringCol.getStats().getAvgSerializedSize() >= PrimitiveType.STRING.getSlotSize()); assertTrue(stringCol.getStats().getAvgSerializedSize() > 0); assertTrue(stringCol.getStats().getMaxSize() > 0); assertTrue(!stringCol.getStats().hasNulls()); }