public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); m_Sb.setLength(0); m_Start = split.getStart(); m_End = m_Start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the m_Start of the split FileSystem fs = file.getFileSystem(job); // getFileStatus fileStatus = fs.getFileStatus(split.getPath()); //noinspection deprecation @SuppressWarnings(value = "deprecated") long length = fs.getLength(file); FSDataInputStream fileIn = fs.open(split.getPath()); if (m_Start > 0) fileIn.seek(m_Start); if (codec != null) { CompressionInputStream inputStream = codec.createInputStream(fileIn); m_Input = new BufferedReader(new InputStreamReader(inputStream)); m_End = length; } else { m_Input = new BufferedReader(new InputStreamReader(fileIn)); } m_Current = m_Start; m_Key = split.getPath().getName(); }
HarFsInputStream(FileSystem fs, Path path, long start, long length, int bufferSize) throws IOException { if (length < 0) { throw new IllegalArgumentException("Negative length [" + length + "]"); } underLyingStream = fs.open(path, bufferSize); underLyingStream.seek(start); // the start of this file in the part file this.start = start; // the position pointer in the part file this.position = start; // the end pointer in the part file this.end = start + length; }
@Override public synchronized long skip(long n) throws IOException { long tmpN = n; if (tmpN > 0) { final long actualRemaining = end - position; if (tmpN > actualRemaining) { tmpN = actualRemaining; } underLyingStream.seek(tmpN + position); position += tmpN; return tmpN; } // NB: the contract is described in java.io.InputStream.skip(long): // this method returns the number of bytes actually skipped, so, // the return value should never be negative. return 0; }
@Override public synchronized void seek(final long pos) throws IOException { validatePosition(pos); position = start + pos; underLyingStream.seek(position); }
private void parseMetaData() throws IOException { Text line = new Text(); long read; FSDataInputStream in = null; LineReader lin = null; try { in = fs.open(masterIndexPath); FileStatus masterStat = fs.getFileStatus(masterIndexPath); masterIndexTimestamp = masterStat.getModificationTime(); lin = new LineReader(in, getConf()); read = lin.readLine(line); // the first line contains the version of the index file String versionLine = line.toString(); String[] arr = versionLine.split(" "); version = Integer.parseInt(arr[0]); // make it always backwards-compatible if (this.version > HarFileSystem.VERSION) { throw new IOException( "Invalid version " + this.version + " expected " + HarFileSystem.VERSION); } // each line contains a hashcode range and the index file name String[] readStr; while (read < masterStat.getLen()) { int b = lin.readLine(line); read += b; readStr = line.toString().split(" "); int startHash = Integer.parseInt(readStr[0]); int endHash = Integer.parseInt(readStr[1]); stores.add( new Store( Long.parseLong(readStr[2]), Long.parseLong(readStr[3]), startHash, endHash)); line.clear(); } } catch (IOException ioe) { LOG.warn("Encountered exception ", ioe); throw ioe; } finally { IOUtils.cleanup(LOG, lin, in); } FSDataInputStream aIn = fs.open(archiveIndexPath); try { FileStatus archiveStat = fs.getFileStatus(archiveIndexPath); archiveIndexTimestamp = archiveStat.getModificationTime(); LineReader aLin; // now start reading the real index file for (Store s : stores) { read = 0; aIn.seek(s.begin); aLin = new LineReader(aIn, getConf()); while (read + s.begin < s.end) { int tmp = aLin.readLine(line); read += tmp; String lineFeed = line.toString(); String[] parsed = lineFeed.split(" "); parsed[0] = decodeFileName(parsed[0]); archive.put(new Path(parsed[0]), new HarStatus(lineFeed)); line.clear(); } } } finally { IOUtils.cleanup(LOG, aIn); } }