Пример #1
0
  /** Validates the properties of the chosen cache pool. Throws on error. */
  public static void validateCachePool(
      THdfsCachingOp op, Long directiveId, TableName table, HdfsPartition partition)
      throws ImpalaRuntimeException {

    CacheDirectiveEntry entry = getDirective(directiveId);
    Preconditions.checkNotNull(entry);

    if (!op.getCache_pool_name().equals(entry.getInfo().getPool())) {
      throw new ImpalaRuntimeException(
          String.format(
              "Cannot cache partition in "
                  + "pool '%s' because it is already cached in '%s'. To change the cache "
                  + "pool for this partition, first uncache using: ALTER TABLE %s.%s "
                  + "%sSET UNCACHED",
              op.getCache_pool_name(),
              entry.getInfo().getPool(),
              table.getDb(),
              table,
              // Insert partition string if partition non null
              partition != null
                  ? String.format(
                      " PARTITION(%s) ", partition.getPartitionName().replaceAll("/", ", "))
                  : ""));
    }
  }
Пример #2
0
  /**
   * Returns a boolean indicating if the given thrift caching operation would perform an update on
   * an already existing cache directive.
   */
  public static boolean isUpdateOp(THdfsCachingOp op, Map<String, String> params)
      throws ImpalaRuntimeException {

    Long directiveId = Long.parseLong(params.get(CACHE_DIR_ID_PROP_NAME));
    CacheDirectiveEntry entry = getDirective(directiveId);
    Preconditions.checkNotNull(entry);

    // Verify cache pool
    if (!op.getCache_pool_name().equals(entry.getInfo().getPool())) {
      return false;
    }

    // Check cache replication factor
    if ((op.isSetReplication() && op.getReplication() != entry.getInfo().getReplication())
        || (!op.isSetReplication()
            && entry.getInfo().getReplication()
                != JniCatalogConstants.HDFS_DEFAULT_CACHE_REPLICATION_FACTOR)) {
      return true;
    }
    return false;
  }
Пример #3
0
  /**
   * Validates and returns true if a parameter map contains a cache directive ID and validates it
   * against the NameNode to make sure it exists. If the cache directive ID does not exist, we
   * remove the value from the parameter map, issue a log message and return false. As the value is
   * not written back to the Hive MS from this method, the result will be only valid until the next
   * metadata fetch. Lastly, we update the cache replication factor in the parameters with the value
   * read from HDFS.
   */
  public static boolean validateCacheParams(Map<String, String> params) {
    Long directiveId = getCacheDirectiveId(params);
    if (directiveId == null) return false;

    CacheDirectiveEntry entry = null;
    try {
      entry = getDirective(directiveId);
    } catch (ImpalaRuntimeException e) {
      if (e.getCause() != null && e.getCause() instanceof RemoteException) {
        // This exception signals that the cache directive no longer exists.
        LOG.error("Cache directive does not exist", e);
        params.remove(CACHE_DIR_ID_PROP_NAME);
        params.remove(CACHE_DIR_REPLICATION_PROP_NAME);
      } else {
        // This exception signals that there was a connection problem with HDFS.
        LOG.error("IO Exception, possible connectivity issues with HDFS", e);
      }
      return false;
    }
    Preconditions.checkNotNull(entry);

    // On the upgrade path the property might not exist, if it exists
    // and is different from the one from the meta store, issue a warning.
    String replicationFactor = params.get(CACHE_DIR_REPLICATION_PROP_NAME);
    if (replicationFactor != null
        && Short.parseShort(replicationFactor) != entry.getInfo().getReplication()) {
      LOG.info(
          "Replication factor for entry in HDFS differs from value in Hive MS: "
              + entry.getInfo().getPath().toString()
              + " "
              + entry.getInfo().getReplication().toString()
              + " != "
              + params.get(CACHE_DIR_REPLICATION_PROP_NAME));
    }
    params.put(CACHE_DIR_REPLICATION_PROP_NAME, String.valueOf(entry.getInfo().getReplication()));
    return true;
  }
Пример #4
0
  /**
   * Waits on a cache directive to either complete or stop making progress. Progress is checked by
   * polling the HDFS caching stats every DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS. We
   * verify the request's "currentBytesCached" is increasing compared to "bytesNeeded". If
   * "currentBytesCached" == "bytesNeeded" or if no progress is made for a
   * MAX_UNCHANGED_CACHING_REFRESH_INTERVALS, this function returns.
   */
  public static void waitForDirective(long directiveId) throws ImpalaRuntimeException {
    long bytesNeeded = 0L;
    long currentBytesCached = 0L;
    CacheDirectiveEntry cacheDir = getDirective(directiveId);
    if (cacheDir == null) return;

    bytesNeeded = cacheDir.getStats().getBytesNeeded();
    currentBytesCached = cacheDir.getStats().getBytesCached();
    LOG.debug(
        String.format(
            "Waiting on cache directive id: %d. Bytes " + "cached (%d) / needed (%d)",
            directiveId, currentBytesCached, bytesNeeded));
    // All the bytes are cached, just return.
    if (bytesNeeded == currentBytesCached) return;

    // The refresh interval is how often HDFS will update cache directive stats. We use
    // this value to determine how frequently we should poll for changes.
    long hdfsRefreshIntervalMs =
        FileSystemUtil.getConfiguration()
            .getLong(
                DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS,
                DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT);
    Preconditions.checkState(hdfsRefreshIntervalMs > 0);

    // Loop until either MAX_UNCHANGED_CACHING_REFRESH_INTERVALS have passed with no
    // changes or all required data is cached.
    int unchangedCounter = 0;
    while (unchangedCounter < MAX_UNCHANGED_CACHING_REFRESH_INTERVALS) {
      long previousBytesCached = currentBytesCached;
      cacheDir = getDirective(directiveId);
      if (cacheDir == null) return;
      currentBytesCached = cacheDir.getStats().getBytesCached();
      bytesNeeded = cacheDir.getStats().getBytesNeeded();
      if (currentBytesCached == bytesNeeded) {
        LOG.debug(
            String.format(
                "Cache directive id: %d has completed." + "Bytes cached (%d) / needed (%d)",
                directiveId, currentBytesCached, bytesNeeded));
        return;
      }

      if (currentBytesCached == previousBytesCached) {
        ++unchangedCounter;
      } else {
        unchangedCounter = 0;
      }
      try {
        // Sleep for the refresh interval + a little bit more to ensure a full interval
        // has completed. A value of 25% the refresh interval was arbitrarily chosen.
        Thread.sleep((long) (hdfsRefreshIntervalMs * 1.25));
      } catch (InterruptedException e) {
        /* ignore */
      }
    }
    LOG.warn(
        String.format(
            "No changes in cached bytes in: %d(ms). All data may not "
                + "be cached. Final stats for cache directive id: %d. Bytes cached (%d)/needed "
                + "(%d)",
            hdfsRefreshIntervalMs * MAX_UNCHANGED_CACHING_REFRESH_INTERVALS,
            directiveId,
            currentBytesCached,
            bytesNeeded));
  }
Пример #5
0
 /**
  * Given a cache directive ID, returns the replication factor for the directive. Returns null if
  * no outstanding cache directives match this ID.
  */
 public static Short getCacheReplication(long directiveId) throws ImpalaRuntimeException {
   CacheDirectiveEntry entry = getDirective(directiveId);
   return entry != null ? entry.getInfo().getReplication() : null;
 }
Пример #6
0
 /**
  * Given a cache directive ID, returns the pool the directive is cached in. Returns null if no
  * outstanding cache directive match this ID.
  */
 public static String getCachePool(long directiveId) throws ImpalaRuntimeException {
   CacheDirectiveEntry entry = getDirective(directiveId);
   return entry == null ? null : entry.getInfo().getPool();
 }