/** * Fix offset and length of block locations. Note that this method modifies the original array. * * @param locations block locations of har part file * @param start the start of the desired range in the contained file * @param len the length of the desired range * @param fileOffsetInHar the offset of the desired file in the har part file * @return block locations with fixed offset and length */ static BlockLocation[] fixBlockLocations( BlockLocation[] locations, long start, long len, long fileOffsetInHar) { // offset 1 past last byte of desired range long end = start + len; for (BlockLocation location : locations) { // offset of part block relative to beginning of desired file // (may be negative if file starts in this part block) long harBlockStart = location.getOffset() - fileOffsetInHar; // offset 1 past last byte of har block relative to beginning of // desired file long harBlockEnd = harBlockStart + location.getLength(); if (start > harBlockStart) { // desired range starts after beginning of this har block // fix offset to beginning of relevant range (relative to desired file) location.setOffset(start); // fix length to relevant portion of har block location.setLength(location.getLength() - (start - harBlockStart)); } else { // desired range includes beginning of this har block location.setOffset(harBlockStart); } if (harBlockEnd > end) { // range ends before end of this har block // fix length to remove irrelevant portion at the end location.setLength(location.getLength() - (harBlockEnd - end)); } } return locations; }
/** Return scan ranges (hdfs splits) plus their storage locations, including volume ids. */ @Override public List<TScanRangeLocations> getScanRangeLocations(long maxScanRangeLength) { List<TScanRangeLocations> result = Lists.newArrayList(); List<HdfsTable.BlockMetadata> blockMetadata = HdfsTable.getBlockMetadata(partitions); for (HdfsTable.BlockMetadata block : blockMetadata) { // collect all locations for block String[] blockHostPorts = null; try { // Use getNames() to get port number as well blockHostPorts = block.getLocation().getNames(); // uncomment if you need to see detailed block locations // LOG.info(Arrays.toString(blockHostPorts)); } catch (IOException e) { // this shouldn't happen, getHosts() doesn't throw anything String errorMsg = "BlockLocation.getHosts() failed:\n" + e.getMessage(); LOG.error(errorMsg); throw new IllegalStateException(errorMsg); } if (blockHostPorts.length == 0) { // we didn't get locations for this block; for now, just ignore the block // TODO: do something meaningful with that continue; } // record host/ports and volume ids Preconditions.checkState(blockHostPorts.length > 0); List<TScanRangeLocation> locations = Lists.newArrayList(); for (int i = 0; i < blockHostPorts.length; ++i) { TScanRangeLocation location = new TScanRangeLocation(); String hostPort = blockHostPorts[i]; location.setServer(addressToTHostPort(hostPort)); location.setVolume_id(block.getVolumeId(i)); locations.add(location); } // create scan ranges, taking into account maxScanRangeLength BlockLocation blockLocation = block.getLocation(); long currentOffset = blockLocation.getOffset(); long remainingLength = blockLocation.getLength(); while (remainingLength > 0) { long currentLength = remainingLength; if (maxScanRangeLength > 0 && remainingLength > maxScanRangeLength) { currentLength = maxScanRangeLength; } TScanRange scanRange = new TScanRange(); scanRange.setHdfs_file_split( new THdfsFileSplit( block.getFileName(), currentOffset, currentLength, block.getPartition().getId())); TScanRangeLocations scanRangeLocations = new TScanRangeLocations(); scanRangeLocations.scan_range = scanRange; scanRangeLocations.locations = locations; result.add(scanRangeLocations); remainingLength -= currentLength; currentOffset += currentLength; } } return result; }
public FileFragment(String tableName, Path uri, BlockLocation blockLocation) throws IOException { this( tableName, uri, blockLocation.getOffset(), blockLocation.getLength(), blockLocation.getHosts(), null); }
@Override public TreeMap<Long, BlockLocation> getLocationsWithOffset(FileSystem fs, FileStatus status) throws IOException { TreeMap<Long, BlockLocation> offsetBlockMap = new TreeMap<Long, BlockLocation>(); BlockLocation[] locations = getLocations(fs, status); for (BlockLocation location : locations) { offsetBlockMap.put(location.getOffset(), location); } return offsetBlockMap; }
protected int getBlockIndex(BlockLocation[] blkLocations, long offset) { for (int i = 0; i < blkLocations.length; i++) { // is the offset inside this block? if ((blkLocations[i].getOffset() <= offset) && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) { return i; } } BlockLocation last = blkLocations[blkLocations.length - 1]; long fileLength = last.getOffset() + last.getLength() - 1; throw new IllegalArgumentException( "Offset " + offset + " is outside of file (0.." + fileLength + ")"); }
/** * Helper function to add an ArraySpec to a HashMap that stores ArraySpec -> BlockLocation * mappings * * @param blockToAS HashMap that stores the mappings being added to * @param offset The offset, in bytes, in the file that this ArraySpec starts at * @param as The ArraySpec to add to the Map */ public static void insertNewAs( HashMap<BlockLocation, ArrayList<ArraySpec>> blockToAS, long offset, ArraySpec as) { // search for the correct BlockLocation // (TODO this is inefficient, fix it) Iterator iter = blockToAS.keySet().iterator(); while (iter.hasNext()) { BlockLocation tempKey = (BlockLocation) (iter.next()); if (tempKey.getOffset() == offset) { (blockToAS.get(tempKey)).add(as); } } }