/** * Returns statistics on this table as a tabular result set. Used for the SHOW TABLE STATS * statement. The schema of the returned TResultSet is set inside this method. */ public TResultSet getTableStats() { TResultSet result = new TResultSet(); TResultSetMetadata resultSchema = new TResultSetMetadata(); result.setSchema(resultSchema); resultSchema.addToColumns(new TColumn("Region Location", ColumnType.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Start RowKey", ColumnType.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Est. #Rows", ColumnType.BIGINT.toThrift())); resultSchema.addToColumns(new TColumn("Size", ColumnType.STRING.toThrift())); // TODO: Consider fancier stats maintenance techniques for speeding up this process. // Currently, we list all regions and perform a mini-scan of each of them to // estimate the number of rows, the data size, etc., which is rather expensive. try { long totalNumRows = 0; long totalHdfsSize = 0; List<HRegionLocation> regions = HBaseTable.getRegionsInRange( hTable_, HConstants.EMPTY_END_ROW, HConstants.EMPTY_START_ROW); for (HRegionLocation region : regions) { TResultRowBuilder rowBuilder = new TResultRowBuilder(); HRegionInfo regionInfo = region.getRegionInfo(); Pair<Long, Long> estRowStats = getEstimatedRowStats(regionInfo.getStartKey(), regionInfo.getEndKey()); long numRows = estRowStats.first.longValue(); long hdfsSize = getHdfsSize(regionInfo); totalNumRows += numRows; totalHdfsSize += hdfsSize; // Add the region location, start rowkey, number of rows and raw Hdfs size. rowBuilder .add(String.valueOf(region.getHostname())) .add(Bytes.toString(regionInfo.getStartKey())) .add(numRows) .addBytes(hdfsSize); result.addToRows(rowBuilder.get()); } // Total num rows and raw Hdfs size. if (regions.size() > 1) { TResultRowBuilder rowBuilder = new TResultRowBuilder(); rowBuilder.add("Total").add("").add(totalNumRows).addBytes(totalHdfsSize); result.addToRows(rowBuilder.get()); } } catch (IOException e) { throw new RuntimeException(e); } return result; }
/** * Returns statistics on this table as a tabular result set. Used for the SHOW TABLE STATS * statement. The schema of the returned TResultSet is set inside this method. */ public TResultSet getTableStats() { TResultSet result = new TResultSet(); TResultSetMetadata resultSchema = new TResultSetMetadata(); result.setSchema(resultSchema); for (int i = 0; i < numClusteringCols_; ++i) { // Add the partition-key values as strings for simplicity. Column partCol = colsByPos_.get(i); TColumn colDesc = new TColumn(partCol.getName(), partCol.getType().toThrift()); resultSchema.addToColumns(colDesc); } resultSchema.addToColumns(new TColumn("#Rows", ColumnType.BIGINT.toThrift())); resultSchema.addToColumns(new TColumn("#Files", ColumnType.BIGINT.toThrift())); resultSchema.addToColumns(new TColumn("Size", ColumnType.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Format", ColumnType.STRING.toThrift())); // Pretty print partitions and their stats. ArrayList<HdfsPartition> orderedPartitions = Lists.newArrayList(partitions_); Collections.sort(orderedPartitions); for (HdfsPartition p : orderedPartitions) { // Ignore dummy default partition. if (p.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) continue; TResultRowBuilder rowBuilder = new TResultRowBuilder(); // Add the partition-key values (as strings for simplicity). for (LiteralExpr expr : p.getPartitionValues()) { rowBuilder.add(expr.getStringValue()); } // Add number of rows, files, bytes and the file format. rowBuilder .add(p.getNumRows()) .add(p.getFileDescriptors().size()) .addBytes(p.getSize()) .add(p.getInputFormatDescriptor().getFileFormat().toString()); result.addToRows(rowBuilder.get()); } // For partitioned tables add a summary row at the bottom. if (numClusteringCols_ > 0) { TResultRowBuilder rowBuilder = new TResultRowBuilder(); int numEmptyCells = numClusteringCols_ - 1; rowBuilder.add("Total"); for (int i = 0; i < numEmptyCells; ++i) { rowBuilder.add(""); } // Total num rows, files, and bytes (leave format empty). rowBuilder.add(numRows_).add(numHdfsFiles_).addBytes(totalHdfsBytes_).add(""); result.addToRows(rowBuilder.get()); } return result; }