/* * Fetch a file that is in a Hadoop file system. Return a local File. * Interruptible. */ private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException { UUID uniqueId = UUID.randomUUID(); File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); } toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); try { for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); oS.close(); cleanDirNoExceptions(toDir); throw new InterruptedException(); } bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; } catch (ClosedByInterruptException e) { // This can be thrown by the method read. cleanDirNoExceptions(toDir); throw new InterruptedIOException(); } }
protected void waitForOpenSlot(int maxProcessesOnNode, Reporter reporter) throws IOException, InterruptedException { while (true) { // sleep for a random length of time between 0 and 60 seconds long sleepTime = (long) (Math.random() * 1000 * 60); logger.info("sleeping for " + sleepTime); Thread.sleep(sleepTime); int numRunningMappers = getNumRunningMappers(); logger.info("num running mappers: " + numRunningMappers); if (numRunningMappers < maxProcessesOnNode) return; reporter.progress(); } }
public void map( LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { if (this.output == null) { this.output = output; } if (this.reporter == null) { this.reporter = reporter; } String line = value.toString(); String[] reads = line.split("\n"); splitRead(key, reads[0], s1FileWriter); splitRead(key, reads[1], getRead2FileWriter()); reporter.progress(); }
/** In case of interrupted, written file is not deleted. */ private void copyFile(File sourceFile, File destFile, Reporter reporter) throws IOException, InterruptedException { if (!destFile.exists()) { destFile.createNewFile(); } FileChannel source = null; FileChannel destination = null; Throttler throttler = new Throttler((double) bytesPerSecThrottle); FileInputStream iS = null; FileOutputStream oS = null; try { iS = new FileInputStream(sourceFile); oS = new FileOutputStream(destFile); source = iS.getChannel(); destination = oS.getChannel(); long bytesSoFar = 0; long reportingBytesSoFar = 0; long size = source.size(); int transferred = 0; while (bytesSoFar < size) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { throw new InterruptedException(); } // Casting to int here is safe since we will transfer at most "downloadBufferSize" bytes. // This is done on purpose for being able to implement Throttling. transferred = (int) destination.transferFrom(source, bytesSoFar, downloadBufferSize); bytesSoFar += transferred; reportingBytesSoFar += transferred; throttler.incrementAndThrottle(transferred); if (reportingBytesSoFar >= bytesToReportProgress) { reporter.progress(reportingBytesSoFar); reportingBytesSoFar = 0l; } } if (reporter != null) { reporter.progress(reportingBytesSoFar); } } catch (InterruptedException e) { e.printStackTrace(); } finally { if (iS != null) { iS.close(); } if (oS != null) { oS.close(); } if (source != null) { source.close(); } if (destination != null) { destination.close(); } } }
/* * Fetch a file that is in a S3 file system. Return a local File. It accepts "s3://" and "s3n://" prefixes. * Interruptible. */ private File s3Fetch(URI uri, Reporter reporter) throws IOException, InterruptedException { String bucketName = uri.getHost(); String path = uri.getPath(); UUID uniqueId = UUID.randomUUID(); File destFolder = new File(tempDir, uniqueId.toString() + "/" + path); if (destFolder.exists()) { FileUtils.deleteDirectory(destFolder); } destFolder.mkdirs(); Throttler throttler = new Throttler((double) bytesPerSecThrottle); boolean done = false; try { s3Service = new RestS3Service(getCredentials()); if (s3Service.checkBucketStatus(bucketName) != RestS3Service.BUCKET_STATUS__MY_BUCKET) { throw new IOException("Bucket doesn't exist or is already claimed: " + bucketName); } if (path.startsWith("/")) { path = path.substring(1, path.length()); } for (S3Object object : s3Service.listObjects(new S3Bucket(bucketName), path, "")) { long bytesSoFar = 0; String fileName = path; if (path.contains("/")) { fileName = path.substring(path.lastIndexOf("/") + 1, path.length()); } File fileDest = new File(destFolder, fileName); log.info("Downloading " + object.getKey() + " to " + fileDest + " ..."); if (fileDest.exists()) { fileDest.delete(); } object = s3Service.getObject(new S3Bucket(bucketName), object.getKey()); InputStream iS = object.getDataInputStream(); FileOutputStream writer = new FileOutputStream(fileDest); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); writer.close(); cleanDirNoExceptions(destFolder); throw new InterruptedException(); } bytesSoFar += nRead; writer.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } writer.close(); iS.close(); done = true; } if (!done) { throw new IOException("Bucket is empty! " + bucketName + " path: " + path); } } catch (S3ServiceException e) { throw new IOException(e); } return destFolder; }