Exemple #1
0
  // TODO: All Hive-stats related tests are temporarily disabled because of an unknown,
  // sporadic issue causing stats of some columns to be absent in Jenkins runs.
  // Investigate this issue further.
  // @Test
  public void testColStatsColTypeMismatch() throws Exception {
    // First load a table that has column stats.
    // catalog_.refreshTable("functional", "alltypesagg", false);
    HdfsTable table = (HdfsTable) catalog_.getDb("functional").getTable("alltypesagg");

    // Now attempt to update a column's stats with mismatched stats data and ensure
    // we get the expected results.
    MetaStoreClient client = catalog_.getMetaStoreClient();
    try {
      // Load some string stats data and use it to update the stats of different
      // typed columns.
      ColumnStatisticsData stringColStatsData =
          client
              .getHiveClient()
              .getTableColumnStatistics("functional", "alltypesagg", "string_col")
              .getStatsObj()
              .get(0)
              .getStatsData();

      assertTrue(!table.getColumn("int_col").updateStats(stringColStatsData));
      assertStatsUnknown(table.getColumn("int_col"));

      assertTrue(!table.getColumn("double_col").updateStats(stringColStatsData));
      assertStatsUnknown(table.getColumn("double_col"));

      assertTrue(!table.getColumn("bool_col").updateStats(stringColStatsData));
      assertStatsUnknown(table.getColumn("bool_col"));

      // Do the same thing, but apply bigint stats to a string column.
      ColumnStatisticsData bigIntCol =
          client
              .getHiveClient()
              .getTableColumnStatistics("functional", "alltypes", "bigint_col")
              .getStatsObj()
              .get(0)
              .getStatsData();
      assertTrue(!table.getColumn("string_col").updateStats(bigIntCol));
      assertStatsUnknown(table.getColumn("string_col"));

      // Now try to apply a matching column stats data and ensure it succeeds.
      assertTrue(table.getColumn("string_col").updateStats(stringColStatsData));
      assertEquals(1178, table.getColumn("string_col").getStats().getNumDistinctValues());
    } finally {
      // Make sure to invalidate the metadata so the next test isn't using bad col stats
      // catalog_.refreshTable("functional", "alltypesagg", false);
      client.release();
    }
  }
  /**
   * Invalidates the table in the catalog cache, potentially adding/removing the table from the
   * cache based on whether it exists in the Hive Metastore. The invalidation logic is: - If the
   * table exists in the metastore, add it to the catalog as an uninitialized IncompleteTable
   * (replacing any existing entry). The table metadata will be loaded lazily, on the next access.
   * If the parent database for this table does not yet exist in Impala's cache it will also be
   * added. - If the table does not exist in the metastore, remove it from the catalog cache. - If
   * we are unable to determine whether the table exists in the metastore (there was an exception
   * thrown making the RPC), invalidate any existing Table by replacing it with an uninitialized
   * IncompleteTable.
   *
   * <p>The parameter updatedObjects is a Pair that contains details on what catalog objects were
   * modified as a result of the invalidateTable() call. The first item in the Pair is a Db which
   * will only be set if a new database was added as a result of this call, otherwise it will be
   * null. The second item in the Pair is the Table that was modified/added/removed. Returns a flag
   * that indicates whether the items in updatedObjects were removed (returns true) or
   * added/modified (return false). Only Tables should ever be removed.
   */
  public boolean invalidateTable(TTableName tableName, Pair<Db, Table> updatedObjects) {
    Preconditions.checkNotNull(updatedObjects);
    updatedObjects.first = null;
    updatedObjects.second = null;
    LOG.debug(
        String.format(
            "Invalidating table metadata: %s.%s",
            tableName.getDb_name(), tableName.getTable_name()));
    String dbName = tableName.getDb_name();
    String tblName = tableName.getTable_name();

    // Stores whether the table exists in the metastore. Can have three states:
    // 1) true - Table exists in metastore.
    // 2) false - Table does not exist in metastore.
    // 3) unknown (null) - There was exception thrown by the metastore client.
    Boolean tableExistsInMetaStore;
    MetaStoreClient msClient = getMetaStoreClient();
    try {
      tableExistsInMetaStore = msClient.getHiveClient().tableExists(dbName, tblName);
    } catch (UnknownDBException e) {
      // The parent database does not exist in the metastore. Treat this the same
      // as if the table does not exist.
      tableExistsInMetaStore = false;
    } catch (TException e) {
      LOG.error("Error executing tableExists() metastore call: " + tblName, e);
      tableExistsInMetaStore = null;
    } finally {
      msClient.release();
    }

    if (tableExistsInMetaStore != null && !tableExistsInMetaStore) {
      updatedObjects.second = removeTable(dbName, tblName);
      return true;
    } else {
      Db db = getDb(dbName);
      if ((db == null || !db.containsTable(tblName)) && tableExistsInMetaStore == null) {
        // The table does not exist in our cache AND it is unknown whether the table
        // exists in the metastore. Do nothing.
        return false;
      } else if (db == null && tableExistsInMetaStore) {
        // The table exists in the metastore, but our cache does not contain the parent
        // database. A new db will be added to the cache along with the new table.
        db = new Db(dbName, this);
        db.setCatalogVersion(incrementAndGetCatalogVersion());
        addDb(db);
        updatedObjects.first = db;
      }

      // Add a new uninitialized table to the table cache, effectively invalidating
      // any existing entry. The metadata for the table will be loaded lazily, on the
      // on the next access to the table.
      Table newTable = IncompleteTable.createUninitializedTable(getNextTableId(), db, tblName);
      newTable.setCatalogVersion(incrementAndGetCatalogVersion());
      db.addTable(newTable);
      if (loadInBackground_) {
        tableLoadingMgr_.backgroundLoad(
            new TTableName(dbName.toLowerCase(), tblName.toLowerCase()));
      }
      updatedObjects.second = newTable;
      return false;
    }
  }
  /** Resets this catalog instance by clearing all cached table and database metadata. */
  public void reset() throws CatalogException {
    // First update the policy metadata.
    if (sentryProxy_ != null) {
      // Sentry Service is enabled.
      try {
        // Update the authorization policy, waiting for the result to complete.
        sentryProxy_.refresh();
      } catch (Exception e) {
        throw new CatalogException("Error updating authorization policy: ", e);
      }
    }

    catalogLock_.writeLock().lock();
    try {
      nextTableId_.set(0);

      // Since UDFs/UDAs are not persisted in the metastore, we won't clear
      // them across reset. To do this, we store all the functions before
      // clearing and restore them after.
      // TODO: Everything about this. Persist them.
      List<Pair<String, HashMap<String, List<Function>>>> functions = Lists.newArrayList();
      for (Db db : dbCache_.get().values()) {
        if (db.numFunctions() == 0) continue;
        functions.add(Pair.create(db.getName(), db.getAllFunctions()));
      }

      // Build a new DB cache, populate it, and replace the existing cache in one
      // step.
      ConcurrentHashMap<String, Db> newDbCache = new ConcurrentHashMap<String, Db>();
      List<TTableName> tblsToBackgroundLoad = Lists.newArrayList();
      MetaStoreClient msClient = metaStoreClientPool_.getClient();
      try {
        for (String dbName : msClient.getHiveClient().getAllDatabases()) {
          Db db = new Db(dbName, this);
          db.setCatalogVersion(incrementAndGetCatalogVersion());
          newDbCache.put(db.getName().toLowerCase(), db);

          for (String tableName : msClient.getHiveClient().getAllTables(dbName)) {
            Table incompleteTbl =
                IncompleteTable.createUninitializedTable(getNextTableId(), db, tableName);
            incompleteTbl.setCatalogVersion(incrementAndGetCatalogVersion());
            db.addTable(incompleteTbl);
            if (loadInBackground_) {
              tblsToBackgroundLoad.add(
                  new TTableName(dbName.toLowerCase(), tableName.toLowerCase()));
            }
          }
        }
      } finally {
        msClient.release();
      }

      // Restore UDFs/UDAs.
      for (Pair<String, HashMap<String, List<Function>>> dbFns : functions) {
        Db db = null;
        try {
          db = newDbCache.get(dbFns.first);
        } catch (Exception e) {
          continue;
        }
        if (db == null) {
          // DB no longer exists - it was probably dropped externally.
          // TODO: We could restore this DB and then add the functions back?
          continue;
        }

        for (List<Function> fns : dbFns.second.values()) {
          for (Function fn : fns) {
            if (fn.getBinaryType() == TFunctionBinaryType.BUILTIN) continue;
            fn.setCatalogVersion(incrementAndGetCatalogVersion());
            db.addFunction(fn);
          }
        }
      }
      dbCache_.set(newDbCache);
      addDb(builtinsDb_);
      // Submit tables for background loading.
      for (TTableName tblName : tblsToBackgroundLoad) {
        tableLoadingMgr_.backgroundLoad(tblName);
      }
    } catch (Exception e) {
      LOG.error(e);
      throw new CatalogException("Error initializing Catalog. Catalog may be empty.", e);
    } finally {
      catalogLock_.writeLock().unlock();
    }
  }