private void addBlocks( VolumeManager fs, String host, ArrayList<String> files, Map<String, Long> totalBlocks, Map<String, Long> localBlocks) throws Exception { long allBlocks = 0; long matchingBlocks = 0; if (!totalBlocks.containsKey(host)) { totalBlocks.put(host, 0L); localBlocks.put(host, 0L); } for (String file : files) { Path filePath = new Path(file); FileSystem ns = fs.getFileSystemByPath(filePath); FileStatus fileStatus = ns.getFileStatus(filePath); BlockLocation[] fileBlockLocations = ns.getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); for (BlockLocation blockLocation : fileBlockLocations) { allBlocks++; for (String location : blockLocation.getHosts()) { HostAndPort hap = HostAndPort.fromParts(location, 0); if (hap.getHostText().equals(host)) { matchingBlocks++; break; } } } } totalBlocks.put(host, allBlocks + totalBlocks.get(host)); localBlocks.put(host, matchingBlocks + localBlocks.get(host)); }
OneFileInfo( Path path, Configuration conf, HashMap<String, List<OneBlockInfo>> rackToBlocks, HashMap<OneBlockInfo, String[]> blockToNodes, HashMap<String, List<OneBlockInfo>> nodeToBlocks) throws IOException { this.fileSize = 0; // get block locations from file system FileSystem fs = path.getFileSystem(conf); FileStatus stat = fs.getFileStatus(path); BlockLocation[] locations = fs.getFileBlockLocations(stat, 0, stat.getLen()); // create a list of all block and their locations if (locations == null) { blocks = new OneBlockInfo[0]; } else { blocks = new OneBlockInfo[locations.length]; for (int i = 0; i < locations.length; i++) { fileSize += locations[i].getLength(); OneBlockInfo oneblock = new OneBlockInfo( path, locations[i].getOffset(), locations[i].getLength(), locations[i].getHosts(), locations[i].getTopologyPaths()); blocks[i] = oneblock; // add this block to the block --> node locations map blockToNodes.put(oneblock, oneblock.hosts); // add this block to the rack --> block map for (int j = 0; j < oneblock.racks.length; j++) { String rack = oneblock.racks[j]; List<OneBlockInfo> blklist = rackToBlocks.get(rack); if (blklist == null) { blklist = new ArrayList<OneBlockInfo>(); rackToBlocks.put(rack, blklist); } blklist.add(oneblock); // Add this host to rackToNodes map addHostToRack(oneblock.racks[j], oneblock.hosts[j]); } // add this block to the node --> block map for (int j = 0; j < oneblock.hosts.length; j++) { String node = oneblock.hosts[j]; List<OneBlockInfo> blklist = nodeToBlocks.get(node); if (blklist == null) { blklist = new ArrayList<OneBlockInfo>(); nodeToBlocks.put(node, blklist); } blklist.add(oneblock); } } } }
@Override public BlockLocation[] getLocations(FileSystem fs, FileStatus status) throws IOException { if (status instanceof LocatedFileStatus) { return ((LocatedFileStatus) status).getBlockLocations(); } else { return fs.getFileBlockLocations(status, 0, status.getLen()); } }
/** * Test {@code BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len)}. Test * the different situations of different start and len. */ @Test public void basicBlockLocationTest() throws Exception { long start = 0; long len = 0; FileStatus fStatus = sTFS.getFileStatus(new Path("/testFile1")); // block0.offset = start < start+len < block1.offset start = 0; len = BLOCK_SIZE - 1; Assert.assertEquals(1, sTFS.getFileBlockLocations(fStatus, start, len).length); // block0.offset < start < start+len < block1.offset start = 1; len = BLOCK_SIZE - 2; Assert.assertEquals(1, sTFS.getFileBlockLocations(fStatus, start, len).length); // block0.offset < start = start+len < block1.offset start = 1; len = 0; Assert.assertEquals(1, sTFS.getFileBlockLocations(fStatus, start, len).length); // block0.offset = start < start+len = block1.offset start = 0; len = BLOCK_SIZE; Assert.assertEquals(2, sTFS.getFileBlockLocations(fStatus, start, len).length); // block0.offset = start < block1.offset < start+len < block2.offset start = 0; len = BLOCK_SIZE + 1; Assert.assertEquals(2, sTFS.getFileBlockLocations(fStatus, start, len).length); // block0.offset < start < block1.offset < start+len < block2.offset start = 1; len = BLOCK_SIZE; Assert.assertEquals(2, sTFS.getFileBlockLocations(fStatus, start, len).length); // block0.offset = start < start+len = block2.offset start = 0; len = BLOCK_SIZE * 2; Assert.assertEquals(3, sTFS.getFileBlockLocations(fStatus, start, len).length); // block0.offset = start < start+len = file.len start = 0; len = FILE_LEN; Assert.assertEquals(3, sTFS.getFileBlockLocations(fStatus, start, len).length); // file.len < start < start+len start = FILE_LEN + 1; len = 1; Assert.assertEquals(0, sTFS.getFileBlockLocations(fStatus, start, len).length); }
/** * Generate the list of files and make them into FileSplits. This needs to be copied to insert a * filter on acceptable data */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); long desiredMappers = job.getConfiguration().getLong("org.systemsbiology.jxtandem.DesiredXMLInputMappers", 0); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> fileStatuses = listStatus(job); boolean forceNumberMappers = fileStatuses.size() == 1; for (FileStatus file : fileStatuses) { Path path = file.getPath(); if (!isPathAcceptable(path)) // filter acceptable data continue; FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); // use desired mappers to force more splits if (forceNumberMappers && desiredMappers > 0) maxSize = Math.min(maxSize, (length / desiredMappers)); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (withinSlop(splitSize, bytesRemaining)) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add( new FileSplit( path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add( new FileSplit( path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } } System.out.println("Total # of splits: " + splits.size()); // LOG.debug("Total # of splits: " + splits.size()); return splits; }
public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = job.getConfiguration(); int numMapTasks = conf.getInt("admm.iteration.num.map.tasks", 0); if (0 == numMapTasks) { return super.getSplits(job); } // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); for (FileStatus file : files) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = Math.max(computeSplitSize(JAVA_OPTS, numMapTasks, length), blockSize); long splitLength = (long) (length / Math.ceil((double) length / splitSize)); long bytesRemaining = length; while (((double) bytesRemaining) / splitLength > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add( new FileSplit( path, length - bytesRemaining, splitLength, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitLength; } if (bytesRemaining != 0) { splits.add( new FileSplit( path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { splits.add(new FileSplit(path, 0, length, new String[0])); } } // Save the number of input files in the job-conf job.getConfiguration().setLong(NUM_INPUT_FILES, files.size()); job.getConfiguration().setInt("admm.iteration.num.map.tasks", splits.size()); return splits; }
@Override public List<String> getFileLocations(String path, long offset) throws IOException { List<String> ret = new ArrayList<String>(); try { FileStatus fStatus = mFs.getFileStatus(new Path(path)); BlockLocation[] bLocations = mFs.getFileBlockLocations(fStatus, offset, 1); if (bLocations.length > 0) { String[] names = bLocations[0].getNames(); Collections.addAll(ret, names); } } catch (IOException e) { LOG.error("Unable to get file location for " + path, e); } return ret; }
private void waitForBlocks(FileSystem fileSys, Path name) throws IOException { // wait until we have at least one block in the file to read. boolean done = false; while (!done) { try { Thread.sleep(1000); } catch (InterruptedException e) { } done = true; BlockLocation[] locations = fileSys.getFileBlockLocations(fileSys.getFileStatus(name), 0, blockSize); if (locations.length < 1) { done = false; continue; } } }
static void checkFullFile(FileSystem fs, Path name) throws IOException { FileStatus stat = fs.getFileStatus(name); BlockLocation[] locations = fs.getFileBlockLocations(stat, 0, fileSize); for (int idx = 0; idx < locations.length; idx++) { String[] hosts = locations[idx].getNames(); for (int i = 0; i < hosts.length; i++) { System.out.print(hosts[i] + " "); } System.out.println( " off " + locations[idx].getOffset() + " len " + locations[idx].getLength()); } byte[] expected = AppendTestUtil.randomBytes(seed, fileSize); FSDataInputStream stm = fs.open(name); byte[] actual = new byte[fileSize]; stm.readFully(0, actual); checkData(actual, 0, expected, "Read 2"); stm.close(); }
@Override public BlockLocation[] getFileBlockLocations( final FileStatus file, final long start, final long len) throws IOException { if (!(file instanceof HadoopFileStatus)) { throw new IOException("file is not an instance of DistributedFileStatus"); } final HadoopFileStatus f = (HadoopFileStatus) file; final org.apache.hadoop.fs.BlockLocation[] blkLocations = fs.getFileBlockLocations(f.getInternalFileStatus(), start, len); // Wrap up HDFS specific block location objects final HadoopBlockLocation[] distBlkLocations = new HadoopBlockLocation[blkLocations.length]; for (int i = 0; i < distBlkLocations.length; i++) { distBlkLocations[i] = new HadoopBlockLocation(blkLocations[i]); } return distBlkLocations; }
/** * @param hadoopConf * @param bucket bucket to be processed by this split * @param files actual files this split should process. It is assumed the caller has already * parsed out the files in base and deltas to populate this list. * @param base directory of the base, or the partition/table location if the files are in old * style. Can be null. * @param deltas directories of the delta files. * @throws IOException */ CompactorInputSplit( Configuration hadoopConf, int bucket, List<Path> files, Path base, Path[] deltas) throws IOException { bucketNum = bucket; this.base = base; this.deltas = deltas; locations = new ArrayList<String>(); for (Path path : files) { FileSystem fs = path.getFileSystem(hadoopConf); FileStatus stat = fs.getFileStatus(path); length += stat.getLen(); BlockLocation[] locs = fs.getFileBlockLocations(stat, 0, length); for (int i = 0; i < locs.length; i++) { String[] hosts = locs[i].getHosts(); for (int j = 0; j < hosts.length; j++) { locations.add(hosts[j]); } } } }
// // verify that the data written to the full blocks are sane // private void checkFile(FileSystem fileSys, Path name, int repl) throws IOException { boolean done = false; // wait till all full blocks are confirmed by the datanodes. while (!done) { try { Thread.sleep(1000); } catch (InterruptedException e) { } done = true; BlockLocation[] locations = fileSys.getFileBlockLocations(fileSys.getFileStatus(name), 0, fileSize); if (locations.length < numBlocks) { done = false; continue; } for (int idx = 0; idx < locations.length; idx++) { if (locations[idx].getHosts().length < repl) { done = false; break; } } } FSDataInputStream stm = fileSys.open(name); final byte[] expected; if (simulatedStorage) { expected = new byte[numBlocks * blockSize]; for (int i = 0; i < expected.length; i++) { expected[i] = SimulatedFSDataset.DEFAULT_DATABYTE; } } else { expected = AppendTestUtil.randomBytes(seed, numBlocks * blockSize); } // do a sanity check. Read the file byte[] actual = new byte[numBlocks * blockSize]; stm.readFully(0, actual); stm.close(); checkData(actual, 0, expected, "Read 1"); }
/** wait for the file's replication to be done */ public static void waitReplication(FileSystem fs, Path fileName, short replFactor) throws IOException { boolean good; do { good = true; BlockLocation locs[] = fs.getFileBlockLocations(fs.getFileStatus(fileName), 0, Long.MAX_VALUE); for (int j = 0; j < locs.length; j++) { String[] hostnames = locs[j].getNames(); if (hostnames.length != replFactor) { String hostNameList = ""; for (String h : hostnames) hostNameList += h + " "; System.out.println( "Block " + j + " of file " + fileName + " has replication factor " + hostnames.length + "; locations " + hostNameList); good = false; try { System.out.println("Waiting for replication factor to drain"); Thread.sleep(100); } catch (InterruptedException e) { } break; } } if (good) { System.out.println( "All blocks of file " + fileName + " verified to have replication factor " + replFactor); } } while (!good); }
private void testDataNodeRedirect(Path path) throws IOException { // Create the file if (hdfs.exists(path)) { hdfs.delete(path, true); } FSDataOutputStream out = hdfs.create(path, (short) 1); out.writeBytes("0123456789"); out.close(); // Get the path's block location so we can determine // if we were redirected to the right DN. FileStatus status = hdfs.getFileStatus(path); BlockLocation[] locations = hdfs.getFileBlockLocations(status, 0, 10); String locationName = locations[0].getNames()[0]; // Connect to the NN to get redirected URL u = hftpFs.getNamenodeURL( "/data" + ServletUtil.encodePath(path.toUri().getPath()), "ugi=userx,groupy"); HttpURLConnection conn = (HttpURLConnection) u.openConnection(); HttpURLConnection.setFollowRedirects(true); conn.connect(); conn.getInputStream(); boolean checked = false; // Find the datanode that has the block according to locations // and check that the URL was redirected to this DN's info port for (DataNode node : cluster.getDataNodes()) { DatanodeRegistration dnR = node.dnRegistration; if (dnR.getName().equals(locationName)) { checked = true; assertEquals(dnR.getInfoPort(), conn.getURL().getPort()); } } assertTrue( "The test never checked that location of " + "the block and hftp desitnation are the same", checked); }
// // verify that the data written to the full blocks are sane // private void checkFile(FileSystem fileSys, Path name, int repl) throws IOException { boolean done = false; // wait till all full blocks are confirmed by the datanodes. while (!done) { try { Thread.sleep(1000); } catch (InterruptedException e) {; } done = true; BlockLocation[] locations = fileSys.getFileBlockLocations(fileSys.getFileStatus(name), 0, AppendTestUtil.FILE_SIZE); if (locations.length < AppendTestUtil.NUM_BLOCKS) { System.out.println("Number of blocks found " + locations.length); done = false; continue; } for (int idx = 0; idx < AppendTestUtil.NUM_BLOCKS; idx++) { if (locations[idx].getHosts().length < repl) { System.out.println("Block index " + idx + " not yet replciated."); done = false; break; } } } byte[] expected = new byte[AppendTestUtil.NUM_BLOCKS * AppendTestUtil.BLOCK_SIZE]; if (simulatedStorage) { for (int i = 0; i < expected.length; i++) { expected[i] = SimulatedFSDataset.DEFAULT_DATABYTE; } } else { System.arraycopy(fileContents, 0, expected, 0, expected.length); } // do a sanity check. Read the file AppendTestUtil.checkFullFile( fileSys, name, AppendTestUtil.NUM_BLOCKS * AppendTestUtil.BLOCK_SIZE, expected, "Read 1"); }
static int blocksInFile(FileSystem fs, Path path, long len) throws IOException { FileStatus f = fs.getFileStatus(path); return fs.getFileBlockLocations(f, 0L, len).length; }
/** * Generate the list of files and make them into FileSplits. * * @param job the job context * @throws IOException */ public List<InputSplit> getSplits(JobContext job) throws IOException { Stopwatch sw = new Stopwatch().start(); long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); for (FileStatus file : files) { Path path = file.getPath(); long length = file.getLen(); if (length != 0) { BlockLocation[] blkLocations; if (file instanceof LocatedFileStatus) { blkLocations = ((LocatedFileStatus) file).getBlockLocations(); } else { FileSystem fs = path.getFileSystem(job.getConfiguration()); blkLocations = fs.getFileBlockLocations(file, 0, length); } if (isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add( makeSplit( path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add( makeSplit( path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts())); } } else { // not splitable splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts())); } } else { // Create empty hosts array for zero length files splits.add(makeSplit(path, 0, length, new String[0])); } } // Save the number of input files for metrics/loadgen job.getConfiguration().setLong(NUM_INPUT_FILES, files.size()); sw.stop(); if (LogGlobal.isDebugEnabled()) { /* LOG.debug("Total # of splits generated by getSplits: "+splits.size()+", TimeTaken: "+sw.elapsedMillis()) */ LOG.total_splits_generated_getsplits_timetak( String.valueOf(splits.size()), String.valueOf(sw.elapsedMillis())) .tag("methodCall") .debug(); } return splits; }
@Test public void testHighAvailability() throws IOException { Configuration conf = new HdfsConfiguration(); // Create cluster with 3 readers and 1 writer MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHOPSTopology(4)) .numDataNodes(2) .format(true) .build(); cluster.waitActive(); try { // Get the filesystem and create a directory FileSystem fs = cluster.getFileSystem(0); // Write operation should work since we have one writer assertTrue(fs.mkdirs(dir)); // Write operation - Create a file and write something to it Path file1 = new Path(dir, "file1"); createFile(fs, file1); // Read operation - The file should exist. assertTrue(fs.exists(file1)); // Read operation - List files in this directory assertEquals(1, list(fs)); // Read operation - Get file status FileStatus fileStatus = fs.listStatus(dir)[0]; // Read operation - Get block locations assertNotSame(0, fs.getFileBlockLocations(file1, 0, 1).length); // Now we kill all namenodes except the last two cluster.getNameNode(0).stop(); cluster.getNameNode(1).stop(); // Now lets read again - These operations should be possible assertTrue(fs.exists(file1)); // Writer operation - concat files Path file2 = new Path(dir, "file2"); createFile(fs, file2); assertTrue(fs.exists(file2)); Path file3 = new Path(dir, "file3"); createFile(fs, file3); assertTrue(fs.exists(file3)); Path file4 = new Path(dir, "file4"); // Read operation - list files (3 files created now under this directory) assertEquals(3, list(fs)); // Write operation - rename // [S] commented out because rename is not yet supported // ((DistributedFileSystem) fs).rename(file1, file4); // Kill another namenode cluster.getNameNode(2).stop(); // Read operation - File status fs.getFileStatus(file2); // Write operation - Delete assertTrue(fs.delete(dir, true)); } catch (IOException ex) { // In case we have any connectivity issues here, there is a problem // All connectivitiy issues are handled in the above piece of code LOG.error(ex); ex.printStackTrace(); assertFalse("Cannot be any connectivity issues", ex instanceof ConnectException); fail(); } finally { if (cluster != null) { cluster.shutdown(); } } }
@SuppressWarnings("unchecked") @Override /** * Splits the input collection into sets of files where each Map task gets about the same number * of files */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Path[] paths = FileInputFormat.getInputPaths(job); // HADOOP-1818: Manage splits only if there are paths if (paths.length == 0) { return new InputSplit[0]; } if (numSplits > paths.length) { numSplits = paths.length; } else if (numSplits < 1) { numSplits = 1; } logger.info("Allocating " + paths.length + " files across " + numSplits + " map tasks"); List<PositionAwareSplit<CombineFileSplit>> splits = new ArrayList<PositionAwareSplit<CombineFileSplit>>(numSplits); final int numPaths = paths.length; long[] lengths = new long[numPaths]; TObjectLongHashMap<String>[] locations = (TObjectLongHashMap<String>[]) Array.newInstance(TObjectLongHashMap.class, numPaths); final FileSystem fs = FileSystem.get(job); for (int i = 0; i < paths.length; i++) { final FileStatus fss = fs.getFileStatus(paths[i]); lengths[i] = fss.getLen(); final TObjectLongHashMap<String> location2size = locations[i] = new TObjectLongHashMap<String>(); final long normalblocksize = fss.getBlockSize(); for (long offset = 0; offset < lengths[i]; offset += normalblocksize) { final long blocksize = Math.min(offset + normalblocksize, lengths[i]); final BlockLocation[] blockLocations = fs.getFileBlockLocations(fss, offset, blocksize); for (BlockLocation bl : blockLocations) { for (String host : bl.getHosts()) { location2size.adjustOrPutValue(host, blocksize, blocksize); } } } } // we need to over-estimate using ceil, to ensure that the last split is not /too/ big final int numberOfFilesPerSplit = (int) Math.ceil((double) paths.length / (double) numSplits); int pathsUsed = 0; int splitnum = 0; CombineFileSplit mfs; // for each split except the last one (which may be smaller than numberOfFilesPerSplit) while (pathsUsed < numPaths) { /* caclulate split size for this task - usually numberOfFilesPerSplit, but * less than this for the last split */ final int splitSizeForThisSplit = numberOfFilesPerSplit + pathsUsed > numPaths ? numPaths - pathsUsed : numberOfFilesPerSplit; // arrays of information for split Path[] splitPaths = new Path[splitSizeForThisSplit]; long[] splitLengths = new long[splitSizeForThisSplit]; long[] splitStarts = new long[splitSizeForThisSplit]; final TObjectLongHashMap<String> allLocationsForSplit = new TObjectLongHashMap<String>(); String[] splitLocations = null; // final recommended locations for this split. for (int i = 0; i < splitSizeForThisSplit; i++) { locations[pathsUsed + i].forEachEntry( new TObjectLongProcedure<String>() { public boolean execute(String a, long b) { allLocationsForSplit.adjustOrPutValue(a, b, b); return true; } }); if (allLocationsForSplit.size() <= 3) { splitLocations = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]); } else { String[] hosts = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]); Arrays.sort( hosts, new Comparator<String>() { public int compare(String o1, String o2) { long diffamount = allLocationsForSplit.get(o1) - allLocationsForSplit.get(o2); if (diffamount > 0) { return -1; } else if (diffamount < 0) { return 1; } return 0; } }); splitLocations = new String[3]; System.arraycopy(hosts, 0, splitLocations, 0, 3); } } // copy information for this split System.arraycopy(lengths, pathsUsed, splitLengths, 0, splitSizeForThisSplit); System.arraycopy(paths, pathsUsed, splitPaths, 0, splitSizeForThisSplit); // count the number of paths consumed pathsUsed += splitSizeForThisSplit; // make the actual split object // logger.info("New split of size " + splitSizeForThisSplit); mfs = new CombineFileSplit(job, splitPaths, splitStarts, splitLengths, splitLocations); splits.add(new PositionAwareSplit<CombineFileSplit>(mfs, splitnum)); splitnum++; } if (!(pathsUsed == paths.length)) { throw new IOException("Number of used paths does not equal total available paths!"); } return splits.toArray(new PositionAwareSplit[splits.size()]); }