Exemple #1
0
  /**
   * Returns the first location (HDFS path) that Impala does not have WRITE access to, or an null if
   * none is found. For an unpartitioned table, this just checks the hdfsBaseDir. For a partitioned
   * table it checks all partition directories.
   */
  public String getFirstLocationWithoutWriteAccess() {
    if (getMetaStoreTable() == null) return null;

    if (getMetaStoreTable().getPartitionKeysSize() == 0) {
      if (!TAccessLevelUtil.impliesWriteAccess(accessLevel_)) {
        return hdfsBaseDir_;
      }
    } else {
      for (HdfsPartition partition : partitions_) {
        if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {
          return partition.getLocation();
        }
      }
    }
    return null;
  }
Exemple #2
0
  /**
   * Create HdfsPartition objects corresponding to 'partitions'.
   *
   * <p>If there are no partitions in the Hive metadata, a single partition is added with no
   * partition keys.
   *
   * <p>For files that have not been changed, reuses file descriptors from oldFileDescMap.
   */
  private void loadPartitions(
      List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions,
      org.apache.hadoop.hive.metastore.api.Table msTbl,
      Map<String, FileDescriptor> oldFileDescMap)
      throws IOException, CatalogException {
    partitions_.clear();
    hdfsBaseDir_ = msTbl.getSd().getLocation();
    List<FileDescriptor> newFileDescs = Lists.newArrayList();

    // INSERT statements need to refer to this if they try to write to new partitions
    // Scans don't refer to this because by definition all partitions they refer to
    // exist.
    addDefaultPartition(msTbl.getSd());

    if (msTbl.getPartitionKeysSize() == 0) {
      Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty());
      // This table has no partition key, which means it has no declared partitions.
      // We model partitions slightly differently to Hive - every file must exist in a
      // partition, so add a single partition with no keys which will get all the
      // files in the table's root directory.
      addPartition(msTbl.getSd(), null, new ArrayList<LiteralExpr>(), oldFileDescMap, newFileDescs);
      Path location = new Path(hdfsBaseDir_);
      if (DFS.exists(location)) {
        accessLevel_ = getAvailableAccessLevel(location);
      }
    } else {
      // keep track of distinct partition key values and how many nulls there are
      Set<String>[] uniquePartitionKeys = new HashSet[numClusteringCols_];
      long[] numNullKeys = new long[numClusteringCols_];
      for (int i = 0; i < numClusteringCols_; ++i) {
        uniquePartitionKeys[i] = new HashSet<String>();
        numNullKeys[i] = 0;
      }

      for (org.apache.hadoop.hive.metastore.api.Partition msPartition : msPartitions) {
        // load key values
        List<LiteralExpr> keyValues = Lists.newArrayList();
        int i = 0;
        for (String partitionKey : msPartition.getValues()) {
          uniquePartitionKeys[i].add(partitionKey);
          // Deal with Hive's special NULL partition key.
          if (partitionKey.equals(nullPartitionKeyValue_)) {
            keyValues.add(new NullLiteral());
            ++numNullKeys[i];
          } else {
            ColumnType type = colsByPos_.get(keyValues.size()).getType();
            try {
              Expr expr = LiteralExpr.create(partitionKey, type);
              // Force the literal to be of type declared in the metadata.
              expr = expr.castTo(type);
              keyValues.add((LiteralExpr) expr);
            } catch (AnalysisException ex) {
              LOG.warn("Failed to create literal expression of type: " + type, ex);
              throw new InvalidStorageDescriptorException(ex);
            }
          }
          ++i;
        }
        HdfsPartition partition =
            addPartition(msPartition.getSd(), msPartition, keyValues, oldFileDescMap, newFileDescs);
        // If the partition is null, its HDFS path does not exist, and it was not added to
        // this table's partition list. Skip the partition.
        if (partition == null) continue;

        if (msPartition.getParameters() != null) {
          partition.setNumRows(getRowCount(msPartition.getParameters()));
        }
        if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {
          // TODO: READ_ONLY isn't exactly correct because the it's possible the
          // partition does not have READ permissions either. When we start checking
          // whether we can READ from a table, this should be updated to set the
          // table's access level to the "lowest" effective level across all
          // partitions. That is, if one partition has READ_ONLY and another has
          // WRITE_ONLY the table's access level should be NONE.
          accessLevel_ = TAccessLevel.READ_ONLY;
        }
      }

      // update col stats for partition key cols
      for (int i = 0; i < numClusteringCols_; ++i) {
        ColumnStats stats = colsByPos_.get(i).getStats();
        stats.setNumNulls(numNullKeys[i]);
        stats.setNumDistinctValues(uniquePartitionKeys[i].size());
        LOG.debug("#col=" + Integer.toString(i) + " stats=" + stats.toString());
      }
    }

    if (newFileDescs.size() > 0) {
      loadBlockMd(newFileDescs);
    }
    uniqueHostPortsCount_ = countUniqueDataNetworkLocations(partitions_);
  }
Exemple #3
0
 // True if Impala has HDFS write permissions on the hdfsBaseDir (for an unpartitioned
 // table) or if Impala has write permissions on all partition directories (for
 // a partitioned table).
 public boolean hasWriteAccess() {
   return TAccessLevelUtil.impliesWriteAccess(accessLevel_);
 }