/** * Counts the number of unique data node network addresses for all file blocks in the given * partition. */ private static int countUniqueDataNetworkLocations(List<HdfsPartition> partitions) { Set<TNetworkAddress> uniqueHostPorts = Sets.newHashSet(); for (HdfsPartition partition : partitions) { for (FileDescriptor fileDesc : partition.getFileDescriptors()) { for (THdfsFileBlock blockMd : fileDesc.getFileBlocks()) { for (TNetworkAddress networkAddress : blockMd.getNetwork_addresses()) { uniqueHostPorts.add(networkAddress); } } } } return uniqueHostPorts.size(); }
/** Populate file block metadata inside each file descriptors. */ private void loadBlockMd(List<FileDescriptor> fileDescriptors) throws RuntimeException { LOG.debug("load block md for " + name_); // Block locations for all the files List<BlockLocation> blockLocations = Lists.newArrayList(); // loop over all files and record their block metadata, minus volume ids for (FileDescriptor fileDescriptor : fileDescriptors) { Path p = new Path(fileDescriptor.getFilePath()); BlockLocation[] locations = null; try { FileStatus fileStatus = DFS.getFileStatus(p); // fileDescriptors should not contain directories. Preconditions.checkArgument(!fileStatus.isDirectory()); locations = DFS.getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); if (locations != null) { blockLocations.addAll(Arrays.asList(locations)); for (int i = 0; i < locations.length; ++i) { FileBlock blockMd = new FileBlock( fileDescriptor.getFilePath(), fileDescriptor.getFileLength(), locations[i]); fileDescriptor.addFileBlock(blockMd); } } } catch (IOException e) { throw new RuntimeException( "couldn't determine block locations for path '" + p + "':\n" + e.getMessage(), e); } } if (!SUPPORTS_VOLUME_ID) { return; } // BlockStorageLocations for all the blocks // block described by blockMetadataList[i] is located at locations[i] BlockStorageLocation[] locations = null; try { // Get the BlockStorageLocations for all the blocks locations = DFS.getFileBlockStorageLocations(blockLocations); } catch (IOException e) { LOG.error("Couldn't determine block storage locations:\n" + e.getMessage()); return; } if (locations == null || locations.length == 0) { LOG.warn("Attempted to get block locations but the call returned nulls"); return; } if (locations.length != blockLocations.size()) { // blocks and locations don't match up LOG.error( "Number of block locations not equal to number of blocks: " + "#locations=" + Long.toString(locations.length) + " #blocks=" + Long.toString(blockLocations.size())); return; } int locationsIdx = 0; int unknownDiskIdCount = 0; for (FileDescriptor fileDescriptor : fileDescriptors) { for (THdfsFileBlock blockMd : fileDescriptor.getFileBlocks()) { VolumeId[] volumeIds = locations[locationsIdx++].getVolumeIds(); // Convert opaque VolumeId to 0 based ids. // TODO: the diskId should be eventually retrievable from Hdfs when // the community agrees this API is useful. int[] diskIds = new int[volumeIds.length]; for (int i = 0; i < volumeIds.length; ++i) { diskIds[i] = getDiskId(volumeIds[i]); if (diskIds[i] < 0) ++unknownDiskIdCount; } FileBlock.setDiskIds(diskIds, blockMd); } } LOG.debug("loaded disk ids for table " + getFullName() + ". nodes: " + getNumNodes()); if (unknownDiskIdCount > 0) { LOG.warn("unknown disk id count " + unknownDiskIdCount); } }