Exemple #1
0
  /**
   * Returns statistics on this table as a tabular result set. Used for the SHOW TABLE STATS
   * statement. The schema of the returned TResultSet is set inside this method.
   */
  public TResultSet getTableStats() {
    TResultSet result = new TResultSet();
    TResultSetMetadata resultSchema = new TResultSetMetadata();
    result.setSchema(resultSchema);
    for (int i = 0; i < numClusteringCols_; ++i) {
      // Add the partition-key values as strings for simplicity.
      Column partCol = colsByPos_.get(i);
      TColumn colDesc = new TColumn(partCol.getName(), partCol.getType().toThrift());
      resultSchema.addToColumns(colDesc);
    }
    resultSchema.addToColumns(new TColumn("#Rows", ColumnType.BIGINT.toThrift()));
    resultSchema.addToColumns(new TColumn("#Files", ColumnType.BIGINT.toThrift()));
    resultSchema.addToColumns(new TColumn("Size", ColumnType.STRING.toThrift()));
    resultSchema.addToColumns(new TColumn("Format", ColumnType.STRING.toThrift()));

    // Pretty print partitions and their stats.
    ArrayList<HdfsPartition> orderedPartitions = Lists.newArrayList(partitions_);
    Collections.sort(orderedPartitions);

    for (HdfsPartition p : orderedPartitions) {
      // Ignore dummy default partition.
      if (p.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) continue;
      TResultRowBuilder rowBuilder = new TResultRowBuilder();

      // Add the partition-key values (as strings for simplicity).
      for (LiteralExpr expr : p.getPartitionValues()) {
        rowBuilder.add(expr.getStringValue());
      }

      // Add number of rows, files, bytes and the file format.
      rowBuilder
          .add(p.getNumRows())
          .add(p.getFileDescriptors().size())
          .addBytes(p.getSize())
          .add(p.getInputFormatDescriptor().getFileFormat().toString());
      result.addToRows(rowBuilder.get());
    }

    // For partitioned tables add a summary row at the bottom.
    if (numClusteringCols_ > 0) {
      TResultRowBuilder rowBuilder = new TResultRowBuilder();
      int numEmptyCells = numClusteringCols_ - 1;
      rowBuilder.add("Total");
      for (int i = 0; i < numEmptyCells; ++i) {
        rowBuilder.add("");
      }

      // Total num rows, files, and bytes (leave format empty).
      rowBuilder.add(numRows_).add(numHdfsFiles_).addBytes(totalHdfsBytes_).add("");
      result.addToRows(rowBuilder.get());
    }
    return result;
  }
Exemple #2
0
  /**
   * Returns statistics on this table as a tabular result set. Used for the SHOW TABLE STATS
   * statement. The schema of the returned TResultSet is set inside this method.
   */
  public TResultSet getTableStats() {
    TResultSet result = new TResultSet();
    TResultSetMetadata resultSchema = new TResultSetMetadata();
    result.setSchema(resultSchema);
    resultSchema.addToColumns(new TColumn("Region Location", ColumnType.STRING.toThrift()));
    resultSchema.addToColumns(new TColumn("Start RowKey", ColumnType.STRING.toThrift()));
    resultSchema.addToColumns(new TColumn("Est. #Rows", ColumnType.BIGINT.toThrift()));
    resultSchema.addToColumns(new TColumn("Size", ColumnType.STRING.toThrift()));

    // TODO: Consider fancier stats maintenance techniques for speeding up this process.
    // Currently, we list all regions and perform a mini-scan of each of them to
    // estimate the number of rows, the data size, etc., which is rather expensive.
    try {
      long totalNumRows = 0;
      long totalHdfsSize = 0;
      List<HRegionLocation> regions =
          HBaseTable.getRegionsInRange(
              hTable_, HConstants.EMPTY_END_ROW, HConstants.EMPTY_START_ROW);
      for (HRegionLocation region : regions) {
        TResultRowBuilder rowBuilder = new TResultRowBuilder();
        HRegionInfo regionInfo = region.getRegionInfo();
        Pair<Long, Long> estRowStats =
            getEstimatedRowStats(regionInfo.getStartKey(), regionInfo.getEndKey());

        long numRows = estRowStats.first.longValue();
        long hdfsSize = getHdfsSize(regionInfo);
        totalNumRows += numRows;
        totalHdfsSize += hdfsSize;

        // Add the region location, start rowkey, number of rows and raw Hdfs size.
        rowBuilder
            .add(String.valueOf(region.getHostname()))
            .add(Bytes.toString(regionInfo.getStartKey()))
            .add(numRows)
            .addBytes(hdfsSize);
        result.addToRows(rowBuilder.get());
      }

      // Total num rows and raw Hdfs size.
      if (regions.size() > 1) {
        TResultRowBuilder rowBuilder = new TResultRowBuilder();
        rowBuilder.add("Total").add("").add(totalNumRows).addBytes(totalHdfsSize);
        result.addToRows(rowBuilder.get());
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
    return result;
  }