/** * Verify that all THdfsPartitions included in the descriptor table are referenced by at least one * scan range or part of an inserted table. PrintScanRangeLocations will implicitly verify the * converse (it'll fail if a scan range references a table/partition descriptor that is not * present). */ private void testHdfsPartitionsReferenced( TQueryExecRequest execRequest, String query, StringBuilder errorLog) { long insertTableId = -1; // Collect all partitions that are referenced by a scan range. Set<THdfsPartition> scanRangePartitions = Sets.newHashSet(); if (execRequest.per_node_scan_ranges != null) { for (Map.Entry<Integer, List<TScanRangeLocations>> entry : execRequest.per_node_scan_ranges.entrySet()) { if (entry.getValue() == null) { continue; } for (TScanRangeLocations locations : entry.getValue()) { if (locations.scan_range.isSetHdfs_file_split()) { THdfsFileSplit split = locations.scan_range.getHdfs_file_split(); THdfsPartition partition = findPartition(entry.getKey(), split); scanRangePartitions.add(partition); } } } } if (execRequest.isSetFinalize_params()) { insertTableId = execRequest.getFinalize_params().getTable_id(); } boolean first = true; // Iterate through all partitions of the descriptor table and verify all partitions // are referenced. if (execRequest.isSetDesc_tbl() && execRequest.desc_tbl.isSetTableDescriptors()) { for (TTableDescriptor tableDesc : execRequest.desc_tbl.tableDescriptors) { // All partitions of insertTableId are okay. if (tableDesc.getId() == insertTableId) continue; if (!tableDesc.isSetHdfsTable()) continue; THdfsTable hdfsTable = tableDesc.getHdfsTable(); for (Map.Entry<Long, THdfsPartition> e : hdfsTable.getPartitions().entrySet()) { THdfsPartition partition = e.getValue(); if (!scanRangePartitions.contains(partition)) { if (first) errorLog.append("query:\n" + query + "\n"); errorLog.append( " unreferenced partition: HdfsTable: " + tableDesc.getId() + " HdfsPartition: " + partition.getId() + "\n"); first = false; } } } } }
private THdfsTable getHdfsTable() { Map<Long, THdfsPartition> idToPartition = Maps.newHashMap(); for (HdfsPartition partition : partitions_) { idToPartition.put(partition.getId(), partition.toThrift(true)); } List<String> colNames = new ArrayList<String>(); for (int i = 0; i < colsByPos_.size(); ++i) { colNames.add(colsByPos_.get(i).getName()); } THdfsTable hdfsTable = new THdfsTable( hdfsBaseDir_, colNames, nullPartitionKeyValue_, nullColumnValue_, idToPartition); hdfsTable.setAvroSchema(avroSchema_); return hdfsTable; }
/** * Look up the partition corresponding to the plan node (identified by nodeId) and a file split. */ private THdfsPartition findPartition(int nodeId, THdfsFileSplit split) { TPlanNode node = planMap_.get(nodeId); Preconditions.checkNotNull(node); Preconditions.checkState(node.node_id == nodeId && node.isSetHdfs_scan_node()); THdfsScanNode scanNode = node.getHdfs_scan_node(); int tupleId = scanNode.getTuple_id(); TTupleDescriptor tupleDesc = tupleMap_.get(tupleId); Preconditions.checkNotNull(tupleDesc); Preconditions.checkState(tupleDesc.id == tupleId); TTableDescriptor tableDesc = tableMap_.get(tupleDesc.tableId); Preconditions.checkNotNull(tableDesc); Preconditions.checkState(tableDesc.id == tupleDesc.tableId && tableDesc.isSetHdfsTable()); THdfsTable hdfsTable = tableDesc.getHdfsTable(); THdfsPartition partition = hdfsTable.getPartitions().get(split.partition_id); Preconditions.checkNotNull(partition); Preconditions.checkState(partition.id == split.partition_id); return partition; }
@Override protected void loadFromThrift(TTable thriftTable) throws TableLoadingException { super.loadFromThrift(thriftTable); THdfsTable hdfsTable = thriftTable.getHdfs_table(); hdfsBaseDir_ = hdfsTable.getHdfsBaseDir(); nullColumnValue_ = hdfsTable.nullColumnValue; nullPartitionKeyValue_ = hdfsTable.nullPartitionKeyValue; numHdfsFiles_ = 0; totalHdfsBytes_ = 0; for (Map.Entry<Long, THdfsPartition> part : hdfsTable.getPartitions().entrySet()) { HdfsPartition hdfsPart = HdfsPartition.fromThrift(this, part.getKey(), part.getValue()); numHdfsFiles_ += hdfsPart.getFileDescriptors().size(); totalHdfsBytes_ += hdfsPart.getSize(); partitions_.add(hdfsPart); } uniqueHostPortsCount_ = countUniqueDataNetworkLocations(partitions_); avroSchema_ = hdfsTable.isSetAvroSchema() ? hdfsTable.getAvroSchema() : null; }