/** Test conversion of LocatedBlock to BlockLocation */ @Test public void testLocatedBlocks2Locations() { DatanodeInfo d = DFSTestUtil.getLocalDatanodeInfo(); DatanodeInfo[] ds = new DatanodeInfo[1]; ds[0] = d; // ok ExtendedBlock b1 = new ExtendedBlock("bpid", 1, 1, 1); LocatedBlock l1 = new LocatedBlock(b1, ds, 0, false); // corrupt ExtendedBlock b2 = new ExtendedBlock("bpid", 2, 1, 1); LocatedBlock l2 = new LocatedBlock(b2, ds, 0, true); List<LocatedBlock> ls = Arrays.asList(l1, l2); LocatedBlocks lbs = new LocatedBlocks(10, false, ls, l2, true, null); BlockLocation[] bs = DFSUtil.locatedBlocks2Locations(lbs); assertTrue("expected 2 blocks but got " + bs.length, bs.length == 2); int corruptCount = 0; for (BlockLocation b : bs) { if (b.isCorrupt()) { corruptCount++; } } assertTrue("expected 1 corrupt files but got " + corruptCount, corruptCount == 1); // test an empty location bs = DFSUtil.locatedBlocks2Locations(new LocatedBlocks()); assertEquals(0, bs.length); }
private void addBlocks( VolumeManager fs, String host, ArrayList<String> files, Map<String, Long> totalBlocks, Map<String, Long> localBlocks) throws Exception { long allBlocks = 0; long matchingBlocks = 0; if (!totalBlocks.containsKey(host)) { totalBlocks.put(host, 0L); localBlocks.put(host, 0L); } for (String file : files) { Path filePath = new Path(file); FileSystem ns = fs.getFileSystemByPath(filePath); FileStatus fileStatus = ns.getFileStatus(filePath); BlockLocation[] fileBlockLocations = ns.getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); for (BlockLocation blockLocation : fileBlockLocations) { allBlocks++; for (String location : blockLocation.getHosts()) { HostAndPort hap = HostAndPort.fromParts(location, 0); if (hap.getHostText().equals(host)) { matchingBlocks++; break; } } } } totalBlocks.put(host, allBlocks + totalBlocks.get(host)); localBlocks.put(host, matchingBlocks + localBlocks.get(host)); }
/** Return scan ranges (hdfs splits) plus their storage locations, including volume ids. */ @Override public List<TScanRangeLocations> getScanRangeLocations(long maxScanRangeLength) { List<TScanRangeLocations> result = Lists.newArrayList(); List<HdfsTable.BlockMetadata> blockMetadata = HdfsTable.getBlockMetadata(partitions); for (HdfsTable.BlockMetadata block : blockMetadata) { // collect all locations for block String[] blockHostPorts = null; try { // Use getNames() to get port number as well blockHostPorts = block.getLocation().getNames(); // uncomment if you need to see detailed block locations // LOG.info(Arrays.toString(blockHostPorts)); } catch (IOException e) { // this shouldn't happen, getHosts() doesn't throw anything String errorMsg = "BlockLocation.getHosts() failed:\n" + e.getMessage(); LOG.error(errorMsg); throw new IllegalStateException(errorMsg); } if (blockHostPorts.length == 0) { // we didn't get locations for this block; for now, just ignore the block // TODO: do something meaningful with that continue; } // record host/ports and volume ids Preconditions.checkState(blockHostPorts.length > 0); List<TScanRangeLocation> locations = Lists.newArrayList(); for (int i = 0; i < blockHostPorts.length; ++i) { TScanRangeLocation location = new TScanRangeLocation(); String hostPort = blockHostPorts[i]; location.setServer(addressToTHostPort(hostPort)); location.setVolume_id(block.getVolumeId(i)); locations.add(location); } // create scan ranges, taking into account maxScanRangeLength BlockLocation blockLocation = block.getLocation(); long currentOffset = blockLocation.getOffset(); long remainingLength = blockLocation.getLength(); while (remainingLength > 0) { long currentLength = remainingLength; if (maxScanRangeLength > 0 && remainingLength > maxScanRangeLength) { currentLength = maxScanRangeLength; } TScanRange scanRange = new TScanRange(); scanRange.setHdfs_file_split( new THdfsFileSplit( block.getFileName(), currentOffset, currentLength, block.getPartition().getId())); TScanRangeLocations scanRangeLocations = new TScanRangeLocations(); scanRangeLocations.scan_range = scanRange; scanRangeLocations.locations = locations; result.add(scanRangeLocations); remainingLength -= currentLength; currentOffset += currentLength; } } return result; }
public FileFragment(String tableName, Path uri, BlockLocation blockLocation) throws IOException { this( tableName, uri, blockLocation.getOffset(), blockLocation.getLength(), blockLocation.getHosts(), null); }
@Override public TreeMap<Long, BlockLocation> getLocationsWithOffset(FileSystem fs, FileStatus status) throws IOException { TreeMap<Long, BlockLocation> offsetBlockMap = new TreeMap<Long, BlockLocation>(); BlockLocation[] locations = getLocations(fs, status); for (BlockLocation location : locations) { offsetBlockMap.put(location.getOffset(), location); } return offsetBlockMap; }
/** * Fix offset and length of block locations. Note that this method modifies the original array. * * @param locations block locations of har part file * @param start the start of the desired range in the contained file * @param len the length of the desired range * @param fileOffsetInHar the offset of the desired file in the har part file * @return block locations with fixed offset and length */ static BlockLocation[] fixBlockLocations( BlockLocation[] locations, long start, long len, long fileOffsetInHar) { // offset 1 past last byte of desired range long end = start + len; for (BlockLocation location : locations) { // offset of part block relative to beginning of desired file // (may be negative if file starts in this part block) long harBlockStart = location.getOffset() - fileOffsetInHar; // offset 1 past last byte of har block relative to beginning of // desired file long harBlockEnd = harBlockStart + location.getLength(); if (start > harBlockStart) { // desired range starts after beginning of this har block // fix offset to beginning of relevant range (relative to desired file) location.setOffset(start); // fix length to relevant portion of har block location.setLength(location.getLength() - (start - harBlockStart)); } else { // desired range includes beginning of this har block location.setOffset(harBlockStart); } if (harBlockEnd > end) { // range ends before end of this har block // fix length to remove irrelevant portion at the end location.setLength(location.getLength() - (harBlockEnd - end)); } } return locations; }
protected int getBlockIndex(BlockLocation[] blkLocations, long offset) { for (int i = 0; i < blkLocations.length; i++) { // is the offset inside this block? if ((blkLocations[i].getOffset() <= offset) && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) { return i; } } BlockLocation last = blkLocations[blkLocations.length - 1]; long fileLength = last.getOffset() + last.getLength() - 1; throw new IllegalArgumentException( "Offset " + offset + " is outside of file (0.." + fileLength + ")"); }
/** * Helper function to add an ArraySpec to a HashMap that stores ArraySpec -> BlockLocation * mappings * * @param blockToAS HashMap that stores the mappings being added to * @param offset The offset, in bytes, in the file that this ArraySpec starts at * @param as The ArraySpec to add to the Map */ public static void insertNewAs( HashMap<BlockLocation, ArrayList<ArraySpec>> blockToAS, long offset, ArraySpec as) { // search for the correct BlockLocation // (TODO this is inefficient, fix it) Iterator iter = blockToAS.keySet().iterator(); while (iter.hasNext()) { BlockLocation tempKey = (BlockLocation) (iter.next()); if (tempKey.getOffset() == offset) { (blockToAS.get(tempKey)).add(as); } } }
@SuppressWarnings("unchecked") @Override /** * Splits the input collection into sets of files where each Map task gets about the same number * of files */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Path[] paths = FileInputFormat.getInputPaths(job); // HADOOP-1818: Manage splits only if there are paths if (paths.length == 0) { return new InputSplit[0]; } if (numSplits > paths.length) { numSplits = paths.length; } else if (numSplits < 1) { numSplits = 1; } logger.info("Allocating " + paths.length + " files across " + numSplits + " map tasks"); List<PositionAwareSplit<CombineFileSplit>> splits = new ArrayList<PositionAwareSplit<CombineFileSplit>>(numSplits); final int numPaths = paths.length; long[] lengths = new long[numPaths]; TObjectLongHashMap<String>[] locations = (TObjectLongHashMap<String>[]) Array.newInstance(TObjectLongHashMap.class, numPaths); final FileSystem fs = FileSystem.get(job); for (int i = 0; i < paths.length; i++) { final FileStatus fss = fs.getFileStatus(paths[i]); lengths[i] = fss.getLen(); final TObjectLongHashMap<String> location2size = locations[i] = new TObjectLongHashMap<String>(); final long normalblocksize = fss.getBlockSize(); for (long offset = 0; offset < lengths[i]; offset += normalblocksize) { final long blocksize = Math.min(offset + normalblocksize, lengths[i]); final BlockLocation[] blockLocations = fs.getFileBlockLocations(fss, offset, blocksize); for (BlockLocation bl : blockLocations) { for (String host : bl.getHosts()) { location2size.adjustOrPutValue(host, blocksize, blocksize); } } } } // we need to over-estimate using ceil, to ensure that the last split is not /too/ big final int numberOfFilesPerSplit = (int) Math.ceil((double) paths.length / (double) numSplits); int pathsUsed = 0; int splitnum = 0; CombineFileSplit mfs; // for each split except the last one (which may be smaller than numberOfFilesPerSplit) while (pathsUsed < numPaths) { /* caclulate split size for this task - usually numberOfFilesPerSplit, but * less than this for the last split */ final int splitSizeForThisSplit = numberOfFilesPerSplit + pathsUsed > numPaths ? numPaths - pathsUsed : numberOfFilesPerSplit; // arrays of information for split Path[] splitPaths = new Path[splitSizeForThisSplit]; long[] splitLengths = new long[splitSizeForThisSplit]; long[] splitStarts = new long[splitSizeForThisSplit]; final TObjectLongHashMap<String> allLocationsForSplit = new TObjectLongHashMap<String>(); String[] splitLocations = null; // final recommended locations for this split. for (int i = 0; i < splitSizeForThisSplit; i++) { locations[pathsUsed + i].forEachEntry( new TObjectLongProcedure<String>() { public boolean execute(String a, long b) { allLocationsForSplit.adjustOrPutValue(a, b, b); return true; } }); if (allLocationsForSplit.size() <= 3) { splitLocations = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]); } else { String[] hosts = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]); Arrays.sort( hosts, new Comparator<String>() { public int compare(String o1, String o2) { long diffamount = allLocationsForSplit.get(o1) - allLocationsForSplit.get(o2); if (diffamount > 0) { return -1; } else if (diffamount < 0) { return 1; } return 0; } }); splitLocations = new String[3]; System.arraycopy(hosts, 0, splitLocations, 0, 3); } } // copy information for this split System.arraycopy(lengths, pathsUsed, splitLengths, 0, splitSizeForThisSplit); System.arraycopy(paths, pathsUsed, splitPaths, 0, splitSizeForThisSplit); // count the number of paths consumed pathsUsed += splitSizeForThisSplit; // make the actual split object // logger.info("New split of size " + splitSizeForThisSplit); mfs = new CombineFileSplit(job, splitPaths, splitStarts, splitLengths, splitLocations); splits.add(new PositionAwareSplit<CombineFileSplit>(mfs, splitnum)); splitnum++; } if (!(pathsUsed == paths.length)) { throw new IOException("Number of used paths does not equal total available paths!"); } return splits.toArray(new PositionAwareSplit[splits.size()]); }