// TODO: All Hive-stats related tests are temporarily disabled because of an unknown, // sporadic issue causing stats of some columns to be absent in Jenkins runs. // Investigate this issue further. // @Test public void testColStatsColTypeMismatch() throws Exception { // First load a table that has column stats. // catalog_.refreshTable("functional", "alltypesagg", false); HdfsTable table = (HdfsTable) catalog_.getDb("functional").getTable("alltypesagg"); // Now attempt to update a column's stats with mismatched stats data and ensure // we get the expected results. MetaStoreClient client = catalog_.getMetaStoreClient(); try { // Load some string stats data and use it to update the stats of different // typed columns. ColumnStatisticsData stringColStatsData = client .getHiveClient() .getTableColumnStatistics("functional", "alltypesagg", "string_col") .getStatsObj() .get(0) .getStatsData(); assertTrue(!table.getColumn("int_col").updateStats(stringColStatsData)); assertStatsUnknown(table.getColumn("int_col")); assertTrue(!table.getColumn("double_col").updateStats(stringColStatsData)); assertStatsUnknown(table.getColumn("double_col")); assertTrue(!table.getColumn("bool_col").updateStats(stringColStatsData)); assertStatsUnknown(table.getColumn("bool_col")); // Do the same thing, but apply bigint stats to a string column. ColumnStatisticsData bigIntCol = client .getHiveClient() .getTableColumnStatistics("functional", "alltypes", "bigint_col") .getStatsObj() .get(0) .getStatsData(); assertTrue(!table.getColumn("string_col").updateStats(bigIntCol)); assertStatsUnknown(table.getColumn("string_col")); // Now try to apply a matching column stats data and ensure it succeeds. assertTrue(table.getColumn("string_col").updateStats(stringColStatsData)); assertEquals(1178, table.getColumn("string_col").getStats().getNumDistinctValues()); } finally { // Make sure to invalidate the metadata so the next test isn't using bad col stats // catalog_.refreshTable("functional", "alltypesagg", false); client.release(); } }
/** * Invalidates the table in the catalog cache, potentially adding/removing the table from the * cache based on whether it exists in the Hive Metastore. The invalidation logic is: - If the * table exists in the metastore, add it to the catalog as an uninitialized IncompleteTable * (replacing any existing entry). The table metadata will be loaded lazily, on the next access. * If the parent database for this table does not yet exist in Impala's cache it will also be * added. - If the table does not exist in the metastore, remove it from the catalog cache. - If * we are unable to determine whether the table exists in the metastore (there was an exception * thrown making the RPC), invalidate any existing Table by replacing it with an uninitialized * IncompleteTable. * * <p>The parameter updatedObjects is a Pair that contains details on what catalog objects were * modified as a result of the invalidateTable() call. The first item in the Pair is a Db which * will only be set if a new database was added as a result of this call, otherwise it will be * null. The second item in the Pair is the Table that was modified/added/removed. Returns a flag * that indicates whether the items in updatedObjects were removed (returns true) or * added/modified (return false). Only Tables should ever be removed. */ public boolean invalidateTable(TTableName tableName, Pair<Db, Table> updatedObjects) { Preconditions.checkNotNull(updatedObjects); updatedObjects.first = null; updatedObjects.second = null; LOG.debug( String.format( "Invalidating table metadata: %s.%s", tableName.getDb_name(), tableName.getTable_name())); String dbName = tableName.getDb_name(); String tblName = tableName.getTable_name(); // Stores whether the table exists in the metastore. Can have three states: // 1) true - Table exists in metastore. // 2) false - Table does not exist in metastore. // 3) unknown (null) - There was exception thrown by the metastore client. Boolean tableExistsInMetaStore; MetaStoreClient msClient = getMetaStoreClient(); try { tableExistsInMetaStore = msClient.getHiveClient().tableExists(dbName, tblName); } catch (UnknownDBException e) { // The parent database does not exist in the metastore. Treat this the same // as if the table does not exist. tableExistsInMetaStore = false; } catch (TException e) { LOG.error("Error executing tableExists() metastore call: " + tblName, e); tableExistsInMetaStore = null; } finally { msClient.release(); } if (tableExistsInMetaStore != null && !tableExistsInMetaStore) { updatedObjects.second = removeTable(dbName, tblName); return true; } else { Db db = getDb(dbName); if ((db == null || !db.containsTable(tblName)) && tableExistsInMetaStore == null) { // The table does not exist in our cache AND it is unknown whether the table // exists in the metastore. Do nothing. return false; } else if (db == null && tableExistsInMetaStore) { // The table exists in the metastore, but our cache does not contain the parent // database. A new db will be added to the cache along with the new table. db = new Db(dbName, this); db.setCatalogVersion(incrementAndGetCatalogVersion()); addDb(db); updatedObjects.first = db; } // Add a new uninitialized table to the table cache, effectively invalidating // any existing entry. The metadata for the table will be loaded lazily, on the // on the next access to the table. Table newTable = IncompleteTable.createUninitializedTable(getNextTableId(), db, tblName); newTable.setCatalogVersion(incrementAndGetCatalogVersion()); db.addTable(newTable); if (loadInBackground_) { tableLoadingMgr_.backgroundLoad( new TTableName(dbName.toLowerCase(), tblName.toLowerCase())); } updatedObjects.second = newTable; return false; } }
/** Resets this catalog instance by clearing all cached table and database metadata. */ public void reset() throws CatalogException { // First update the policy metadata. if (sentryProxy_ != null) { // Sentry Service is enabled. try { // Update the authorization policy, waiting for the result to complete. sentryProxy_.refresh(); } catch (Exception e) { throw new CatalogException("Error updating authorization policy: ", e); } } catalogLock_.writeLock().lock(); try { nextTableId_.set(0); // Since UDFs/UDAs are not persisted in the metastore, we won't clear // them across reset. To do this, we store all the functions before // clearing and restore them after. // TODO: Everything about this. Persist them. List<Pair<String, HashMap<String, List<Function>>>> functions = Lists.newArrayList(); for (Db db : dbCache_.get().values()) { if (db.numFunctions() == 0) continue; functions.add(Pair.create(db.getName(), db.getAllFunctions())); } // Build a new DB cache, populate it, and replace the existing cache in one // step. ConcurrentHashMap<String, Db> newDbCache = new ConcurrentHashMap<String, Db>(); List<TTableName> tblsToBackgroundLoad = Lists.newArrayList(); MetaStoreClient msClient = metaStoreClientPool_.getClient(); try { for (String dbName : msClient.getHiveClient().getAllDatabases()) { Db db = new Db(dbName, this); db.setCatalogVersion(incrementAndGetCatalogVersion()); newDbCache.put(db.getName().toLowerCase(), db); for (String tableName : msClient.getHiveClient().getAllTables(dbName)) { Table incompleteTbl = IncompleteTable.createUninitializedTable(getNextTableId(), db, tableName); incompleteTbl.setCatalogVersion(incrementAndGetCatalogVersion()); db.addTable(incompleteTbl); if (loadInBackground_) { tblsToBackgroundLoad.add( new TTableName(dbName.toLowerCase(), tableName.toLowerCase())); } } } } finally { msClient.release(); } // Restore UDFs/UDAs. for (Pair<String, HashMap<String, List<Function>>> dbFns : functions) { Db db = null; try { db = newDbCache.get(dbFns.first); } catch (Exception e) { continue; } if (db == null) { // DB no longer exists - it was probably dropped externally. // TODO: We could restore this DB and then add the functions back? continue; } for (List<Function> fns : dbFns.second.values()) { for (Function fn : fns) { if (fn.getBinaryType() == TFunctionBinaryType.BUILTIN) continue; fn.setCatalogVersion(incrementAndGetCatalogVersion()); db.addFunction(fn); } } } dbCache_.set(newDbCache); addDb(builtinsDb_); // Submit tables for background loading. for (TTableName tblName : tblsToBackgroundLoad) { tableLoadingMgr_.backgroundLoad(tblName); } } catch (Exception e) { LOG.error(e); throw new CatalogException("Error initializing Catalog. Catalog may be empty.", e); } finally { catalogLock_.writeLock().unlock(); } }