/** Return scan ranges (hdfs splits) plus their storage locations, including volume ids. */ @Override public List<TScanRangeLocations> getScanRangeLocations(long maxScanRangeLength) { List<TScanRangeLocations> result = Lists.newArrayList(); List<HdfsTable.BlockMetadata> blockMetadata = HdfsTable.getBlockMetadata(partitions); for (HdfsTable.BlockMetadata block : blockMetadata) { // collect all locations for block String[] blockHostPorts = null; try { // Use getNames() to get port number as well blockHostPorts = block.getLocation().getNames(); // uncomment if you need to see detailed block locations // LOG.info(Arrays.toString(blockHostPorts)); } catch (IOException e) { // this shouldn't happen, getHosts() doesn't throw anything String errorMsg = "BlockLocation.getHosts() failed:\n" + e.getMessage(); LOG.error(errorMsg); throw new IllegalStateException(errorMsg); } if (blockHostPorts.length == 0) { // we didn't get locations for this block; for now, just ignore the block // TODO: do something meaningful with that continue; } // record host/ports and volume ids Preconditions.checkState(blockHostPorts.length > 0); List<TScanRangeLocation> locations = Lists.newArrayList(); for (int i = 0; i < blockHostPorts.length; ++i) { TScanRangeLocation location = new TScanRangeLocation(); String hostPort = blockHostPorts[i]; location.setServer(addressToTHostPort(hostPort)); location.setVolume_id(block.getVolumeId(i)); locations.add(location); } // create scan ranges, taking into account maxScanRangeLength BlockLocation blockLocation = block.getLocation(); long currentOffset = blockLocation.getOffset(); long remainingLength = blockLocation.getLength(); while (remainingLength > 0) { long currentLength = remainingLength; if (maxScanRangeLength > 0 && remainingLength > maxScanRangeLength) { currentLength = maxScanRangeLength; } TScanRange scanRange = new TScanRange(); scanRange.setHdfs_file_split( new THdfsFileSplit( block.getFileName(), currentOffset, currentLength, block.getPartition().getId())); TScanRangeLocations scanRangeLocations = new TScanRangeLocations(); scanRangeLocations.scan_range = scanRange; scanRangeLocations.locations = locations; result.add(scanRangeLocations); remainingLength -= currentLength; currentOffset += currentLength; } } return result; }