예제 #1
0
  /**
   * Returns statistics on this table as a tabular result set. Used for the SHOW TABLE STATS
   * statement. The schema of the returned TResultSet is set inside this method.
   */
  public TResultSet getTableStats() {
    TResultSet result = new TResultSet();
    TResultSetMetadata resultSchema = new TResultSetMetadata();
    result.setSchema(resultSchema);
    for (int i = 0; i < numClusteringCols_; ++i) {
      // Add the partition-key values as strings for simplicity.
      Column partCol = colsByPos_.get(i);
      TColumn colDesc = new TColumn(partCol.getName(), partCol.getType().toThrift());
      resultSchema.addToColumns(colDesc);
    }
    resultSchema.addToColumns(new TColumn("#Rows", ColumnType.BIGINT.toThrift()));
    resultSchema.addToColumns(new TColumn("#Files", ColumnType.BIGINT.toThrift()));
    resultSchema.addToColumns(new TColumn("Size", ColumnType.STRING.toThrift()));
    resultSchema.addToColumns(new TColumn("Format", ColumnType.STRING.toThrift()));

    // Pretty print partitions and their stats.
    ArrayList<HdfsPartition> orderedPartitions = Lists.newArrayList(partitions_);
    Collections.sort(orderedPartitions);

    for (HdfsPartition p : orderedPartitions) {
      // Ignore dummy default partition.
      if (p.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) continue;
      TResultRowBuilder rowBuilder = new TResultRowBuilder();

      // Add the partition-key values (as strings for simplicity).
      for (LiteralExpr expr : p.getPartitionValues()) {
        rowBuilder.add(expr.getStringValue());
      }

      // Add number of rows, files, bytes and the file format.
      rowBuilder
          .add(p.getNumRows())
          .add(p.getFileDescriptors().size())
          .addBytes(p.getSize())
          .add(p.getInputFormatDescriptor().getFileFormat().toString());
      result.addToRows(rowBuilder.get());
    }

    // For partitioned tables add a summary row at the bottom.
    if (numClusteringCols_ > 0) {
      TResultRowBuilder rowBuilder = new TResultRowBuilder();
      int numEmptyCells = numClusteringCols_ - 1;
      rowBuilder.add("Total");
      for (int i = 0; i < numEmptyCells; ++i) {
        rowBuilder.add("");
      }

      // Total num rows, files, and bytes (leave format empty).
      rowBuilder.add(numRows_).add(numHdfsFiles_).addBytes(totalHdfsBytes_).add("");
      result.addToRows(rowBuilder.get());
    }
    return result;
  }
예제 #2
0
  /**
   * Create HdfsPartition objects corresponding to 'partitions'.
   *
   * <p>If there are no partitions in the Hive metadata, a single partition is added with no
   * partition keys.
   *
   * <p>For files that have not been changed, reuses file descriptors from oldFileDescMap.
   */
  private void loadPartitions(
      List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions,
      org.apache.hadoop.hive.metastore.api.Table msTbl,
      Map<String, FileDescriptor> oldFileDescMap)
      throws IOException, CatalogException {
    partitions_.clear();
    hdfsBaseDir_ = msTbl.getSd().getLocation();
    List<FileDescriptor> newFileDescs = Lists.newArrayList();

    // INSERT statements need to refer to this if they try to write to new partitions
    // Scans don't refer to this because by definition all partitions they refer to
    // exist.
    addDefaultPartition(msTbl.getSd());

    if (msTbl.getPartitionKeysSize() == 0) {
      Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty());
      // This table has no partition key, which means it has no declared partitions.
      // We model partitions slightly differently to Hive - every file must exist in a
      // partition, so add a single partition with no keys which will get all the
      // files in the table's root directory.
      addPartition(msTbl.getSd(), null, new ArrayList<LiteralExpr>(), oldFileDescMap, newFileDescs);
      Path location = new Path(hdfsBaseDir_);
      if (DFS.exists(location)) {
        accessLevel_ = getAvailableAccessLevel(location);
      }
    } else {
      // keep track of distinct partition key values and how many nulls there are
      Set<String>[] uniquePartitionKeys = new HashSet[numClusteringCols_];
      long[] numNullKeys = new long[numClusteringCols_];
      for (int i = 0; i < numClusteringCols_; ++i) {
        uniquePartitionKeys[i] = new HashSet<String>();
        numNullKeys[i] = 0;
      }

      for (org.apache.hadoop.hive.metastore.api.Partition msPartition : msPartitions) {
        // load key values
        List<LiteralExpr> keyValues = Lists.newArrayList();
        int i = 0;
        for (String partitionKey : msPartition.getValues()) {
          uniquePartitionKeys[i].add(partitionKey);
          // Deal with Hive's special NULL partition key.
          if (partitionKey.equals(nullPartitionKeyValue_)) {
            keyValues.add(new NullLiteral());
            ++numNullKeys[i];
          } else {
            ColumnType type = colsByPos_.get(keyValues.size()).getType();
            try {
              Expr expr = LiteralExpr.create(partitionKey, type);
              // Force the literal to be of type declared in the metadata.
              expr = expr.castTo(type);
              keyValues.add((LiteralExpr) expr);
            } catch (AnalysisException ex) {
              LOG.warn("Failed to create literal expression of type: " + type, ex);
              throw new InvalidStorageDescriptorException(ex);
            }
          }
          ++i;
        }
        HdfsPartition partition =
            addPartition(msPartition.getSd(), msPartition, keyValues, oldFileDescMap, newFileDescs);
        // If the partition is null, its HDFS path does not exist, and it was not added to
        // this table's partition list. Skip the partition.
        if (partition == null) continue;

        if (msPartition.getParameters() != null) {
          partition.setNumRows(getRowCount(msPartition.getParameters()));
        }
        if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {
          // TODO: READ_ONLY isn't exactly correct because the it's possible the
          // partition does not have READ permissions either. When we start checking
          // whether we can READ from a table, this should be updated to set the
          // table's access level to the "lowest" effective level across all
          // partitions. That is, if one partition has READ_ONLY and another has
          // WRITE_ONLY the table's access level should be NONE.
          accessLevel_ = TAccessLevel.READ_ONLY;
        }
      }

      // update col stats for partition key cols
      for (int i = 0; i < numClusteringCols_; ++i) {
        ColumnStats stats = colsByPos_.get(i).getStats();
        stats.setNumNulls(numNullKeys[i]);
        stats.setNumDistinctValues(uniquePartitionKeys[i].size());
        LOG.debug("#col=" + Integer.toString(i) + " stats=" + stats.toString());
      }
    }

    if (newFileDescs.size() > 0) {
      loadBlockMd(newFileDescs);
    }
    uniqueHostPortsCount_ = countUniqueDataNetworkLocations(partitions_);
  }