/** * Sort-based shuffle data uses an index called "shuffle_ShuffleId_MapId_0.index" into a data file * called "shuffle_ShuffleId_MapId_0.data". This logic is from IndexShuffleBlockResolver, and the * block id format is from ShuffleDataBlockId and ShuffleIndexBlockId. */ private ManagedBuffer getSortBasedShuffleBlockData( ExecutorShuffleInfo executor, int shuffleId, int mapId, int reduceId) { File indexFile = getFile( executor.localDirs, executor.subDirsPerLocalDir, "shuffle_" + shuffleId + "_" + mapId + "_0.index"); DataInputStream in = null; try { in = new DataInputStream(new FileInputStream(indexFile)); in.skipBytes(reduceId * 8); long offset = in.readLong(); long nextOffset = in.readLong(); return new FileSegmentManagedBuffer( conf, getFile( executor.localDirs, executor.subDirsPerLocalDir, "shuffle_" + shuffleId + "_" + mapId + "_0.data"), offset, nextOffset - offset); } catch (IOException e) { throw new RuntimeException("Failed to open file: " + indexFile, e); } finally { if (in != null) { JavaUtils.closeQuietly(in); } } }
/** * Hashes a filename into the corresponding local directory, in a manner consistent with Spark's * DiskBlockManager.getFile(). */ @VisibleForTesting static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename) { int hash = JavaUtils.nonNegativeHash(filename); String localDir = localDirs[hash % localDirs.length]; int subDirId = (hash / localDirs.length) % subDirsPerLocalDir; return new File(new File(localDir, String.format("%02x", subDirId)), filename); }
/** * Synchronously deletes each directory one at a time. Should be executed in its own thread, as * this may take a long time. */ private void deleteExecutorDirs(String[] dirs) { for (String localDir : dirs) { try { JavaUtils.deleteRecursively(new File(localDir)); logger.debug("Successfully cleaned up directory: " + localDir); } catch (Exception e) { logger.error("Failed to delete directory: " + localDir, e); } } }