/* * Fetch a file that is in a Hadoop file system. Return a local File. * Interruptible. */ private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException { UUID uniqueId = UUID.randomUUID(); File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); } toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); try { for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); oS.close(); cleanDirNoExceptions(toDir); throw new InterruptedException(); } bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; } catch (ClosedByInterruptException e) { // This can be thrown by the method read. cleanDirNoExceptions(toDir); throw new InterruptedIOException(); } }
private void copyFile(File sourceFile, File destFile, Reporter reporter) throws IOException { if (!destFile.exists()) { destFile.createNewFile(); } FileChannel source = null; FileChannel destination = null; Throttler throttler = new Throttler((double) bytesPerSecThrottle); FileInputStream iS = null; FileOutputStream oS = null; try { iS = new FileInputStream(sourceFile); oS = new FileOutputStream(destFile); source = iS.getChannel(); destination = oS.getChannel(); long bytesSoFar = 0; long reportingBytesSoFar = 0; long size = source.size(); int transferred = 0; while (bytesSoFar < size) { // Casting to int here is safe since we will transfer at most "downloadBufferSize" bytes. // This is done on purpose for being able to implement Throttling. transferred = (int) destination.transferFrom(source, bytesSoFar, downloadBufferSize); bytesSoFar += transferred; reportingBytesSoFar += transferred; throttler.incrementAndThrottle(transferred); if (reportingBytesSoFar >= bytesToReportProgress) { reporter.progress(reportingBytesSoFar); reportingBytesSoFar = 0l; } } if (reporter != null) { reporter.progress(reportingBytesSoFar); } } finally { if (iS != null) { iS.close(); } if (oS != null) { oS.close(); } if (source != null) { source.close(); } if (destination != null) { destination.close(); } } }
public synchronized void flush() throws IOException { LOG.info("Starting flush of map output"); synchronized (spillLock) { while (kvstart != kvend) { try { reporter.progress(); spillLock.wait(); } catch (InterruptedException e) { throw (IOException) new IOException("Buffer interrupted while waiting for the writer").initCause(e); } } } if (sortSpillException != null) { throw (IOException) new IOException("Spill failed").initCause(sortSpillException); } if (kvend != kvindex) { LOG.info("bufstart = " + bufstart + "; bufend = " + bufmark + "; bufvoid = " + bufvoid); LOG.info( "kvstart = " + kvstart + "; kvend = " + kvindex + "; length = " + kvoffsets.length); kvend = kvindex; bufend = bufmark; sortAndSpill(); } // release sort buffer before the merge kvbuffer = null; mergeParts(); }
/* * Fetch a file that is in a Hadoop file system. Return a local File. */ private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException { File toFile = new File(tempDir, fromPath.toUri().getPath()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); } toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; }
protected void waitForOpenSlot(int maxProcessesOnNode, Reporter reporter) throws IOException, InterruptedException { while (true) { // sleep for a random length of time between 0 and 60 seconds long sleepTime = (long) (Math.random() * 1000 * 60); logger.info("sleeping for " + sleepTime); Thread.sleep(sleepTime); int numRunningMappers = getNumRunningMappers(); logger.info("num running mappers: " + numRunningMappers); if (numRunningMappers < maxProcessesOnNode) return; reporter.progress(); } }
public void map( LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { if (this.output == null) { this.output = output; } if (this.reporter == null) { this.reporter = reporter; } String line = value.toString(); String[] reads = line.split("\n"); splitRead(key, reads[0], s1FileWriter); splitRead(key, reads[1], getRead2FileWriter()); reporter.progress(); }
@SuppressWarnings("unchecked") private void combineAndSpill(RawKeyValueIterator kvIter, Counters.Counter inCounter) throws IOException { Reducer combiner = (Reducer) ReflectionUtils.newInstance(combinerClass, job); try { CombineValuesIterator values = new CombineValuesIterator( kvIter, comparator, keyClass, valClass, job, reporter, inCounter); while (values.more()) { combiner.reduce(values.getKey(), values, combineCollector, reporter); values.nextKey(); // indicate we're making progress reporter.progress(); } } finally { combiner.close(); } }
@SuppressWarnings("unchecked") public synchronized void collect(K key, V value) throws IOException { reporter.progress(); if (key.getClass() != keyClass) { throw new IOException( "Type mismatch in key from map: expected " + keyClass.getName() + ", recieved " + key.getClass().getName()); } if (value.getClass() != valClass) { throw new IOException( "Type mismatch in value from map: expected " + valClass.getName() + ", recieved " + value.getClass().getName()); } if (sortSpillException != null) { throw (IOException) new IOException("Spill failed").initCause(sortSpillException); } try { // serialize key bytes into buffer int keystart = bufindex; keySerializer.serialize(key); if (bufindex < keystart) { // wrapped the key; reset required bb.reset(); keystart = 0; } // serialize value bytes into buffer int valstart = bufindex; valSerializer.serialize(value); int valend = bb.markRecord(); mapOutputByteCounter.increment( valend >= keystart ? valend - keystart : (bufvoid - keystart) + valend); if (keystart == bufindex) { // if emitted records make no writes, it's possible to wrap // accounting space without notice bb.write(new byte[0], 0, 0); } int partition = partitioner.getPartition(key, value, partitions); if (partition < 0 || partition >= partitions) { throw new IOException("Illegal partition for " + key + " (" + partition + ")"); } mapOutputRecordCounter.increment(1); // update accounting info int ind = kvindex * ACCTSIZE; kvoffsets[kvindex] = ind; kvindices[ind + PARTITION] = partition; kvindices[ind + KEYSTART] = keystart; kvindices[ind + VALSTART] = valstart; kvindex = (kvindex + 1) % kvoffsets.length; } catch (MapBufferTooSmallException e) { LOG.info("Record too large for in-memory buffer: " + e.getMessage()); spillSingleRecord(key, value); mapOutputRecordCounter.increment(1); return; } }
public void collect(K key, V value) throws IOException { reporter.progress(); out.write(key, value); mapOutputRecordCounter.increment(1); }
/* * Fetch a file that is in a S3 file system. Return a local File. It accepts "s3://" and "s3n://" prefixes. */ private File s3Fetch(URI uri, Reporter reporter) throws IOException { String bucketName = uri.getHost(); String path = uri.getPath(); File destFolder = new File(tempDir, bucketName + "/" + path); if (destFolder.exists()) { FileUtils.deleteDirectory(destFolder); } destFolder.mkdirs(); Throttler throttler = new Throttler((double) bytesPerSecThrottle); boolean done = false; try { s3Service = new RestS3Service(getCredentials()); if (s3Service.checkBucketStatus(bucketName) != RestS3Service.BUCKET_STATUS__MY_BUCKET) { throw new IOException("Bucket doesn't exist or is already claimed: " + bucketName); } if (path.startsWith("/")) { path = path.substring(1, path.length()); } for (S3Object object : s3Service.listObjects(new S3Bucket(bucketName), path, "")) { long bytesSoFar = 0; String fileName = path; if (path.contains("/")) { fileName = path.substring(path.lastIndexOf("/") + 1, path.length()); } File fileDest = new File(destFolder, fileName); log.info("Downloading " + object.getKey() + " to " + fileDest + " ..."); if (fileDest.exists()) { fileDest.delete(); } object = s3Service.getObject(new S3Bucket(bucketName), object.getKey()); InputStream iS = object.getDataInputStream(); FileOutputStream writer = new FileOutputStream(fileDest); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { bytesSoFar += nRead; writer.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } writer.close(); iS.close(); done = true; } if (!done) { throw new IOException("Bucket is empty! " + bucketName + " path: " + path); } } catch (S3ServiceException e) { throw new IOException(e); } return destFolder; }