@Override protected int readChunk(long pos, byte[] buf, int offset, int len, byte[] checksum) throws IOException { boolean eof = false; if (needChecksum()) { try { long checksumPos = getChecksumFilePos(pos); if (checksumPos != sums.getPos()) { sums.seek(checksumPos); } sums.readFully(checksum); } catch (EOFException e) { eof = true; } len = bytesPerSum; } if (pos != datas.getPos()) { datas.seek(pos); } int nread = readFully(datas, buf, offset, len); if (eof && nread > 0) { throw new ChecksumException("Checksum error: " + file + " at " + pos, pos); } return nread; }
@Override protected int readChunk(long pos, byte[] buf, int offset, int len, byte[] checksum) throws IOException { boolean eof = false; if (needChecksum()) { assert checksum != null; // we have a checksum buffer assert checksum.length % CHECKSUM_SIZE == 0; // it is sane length assert len >= bytesPerSum; // we must read at least one chunk final int checksumsToRead = Math.min( len / bytesPerSum, // number of checksums based on len to read checksum.length / CHECKSUM_SIZE); // size of checksum buffer long checksumPos = getChecksumFilePos(pos); if (checksumPos != sums.getPos()) { sums.seek(checksumPos); } int sumLenRead = sums.read(checksum, 0, CHECKSUM_SIZE * checksumsToRead); if (sumLenRead >= 0 && sumLenRead % CHECKSUM_SIZE != 0) { throw new ChecksumException( "Checksum file not a length multiple of checksum size " + "in " + file + " at " + pos + " checksumpos: " + checksumPos + " sumLenread: " + sumLenRead, pos); } if (sumLenRead <= 0) { // we're at the end of the file eof = true; } else { // Adjust amount of data to read based on how many checksum chunks we read len = Math.min(len, bytesPerSum * (sumLenRead / CHECKSUM_SIZE)); } } if (pos != datas.getPos()) { datas.seek(pos); } int nread = readFully(datas, buf, offset, len); if (eof && nread > 0) { throw new ChecksumException("Checksum error: " + file + " at " + pos, pos); } return nread; }
private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset) throws IOException { RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false); Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>(); while (rit.hasNext()) { Path path = rit.next().getPath(); String filename = path.toString().substring(path.getParent().toString().length(), path.toString().length()); if (filename.startsWith("/part-")) { long filesize = fs.getFileStatus(path).getLen(); if (offset < filesize) { FSDataInputStream handle = fs.open(path); if (offset > 0) { handle.seek(offset); } fileHandleList.add(handle); } offset -= filesize; } } if (fileHandleList.size() == 1) return fileHandleList.get(0); else if (fileHandleList.size() > 1) { Enumeration<FSDataInputStream> enu = fileHandleList.elements(); return new SequenceInputStream(enu); } else { System.err.println("Error, no source file loaded. run genSeedDataset.sh fisrt!"); return null; } }
/** * Seek to the given position in the stream. The next read() will be from that position. * * <p>This method does not allow seek past the end of the file. This produces IOException. * * @param pos the postion to seek to. * @exception IOException if an I/O error occurs or seeks after EOF ChecksumException if the * chunk to seek to is corrupted */ @Override public synchronized void seek(long pos) throws IOException { if (pos > getFileLength()) { throw new IOException("Cannot seek after EOF"); } super.seek(pos); }
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); m_Sb.setLength(0); m_Start = split.getStart(); m_End = m_Start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the m_Start of the split FileSystem fs = file.getFileSystem(job); // getFileStatus fileStatus = fs.getFileStatus(split.getPath()); //noinspection deprecation @SuppressWarnings(value = "deprecated") long length = fs.getLength(file); FSDataInputStream fileIn = fs.open(split.getPath()); if (m_Start > 0) fileIn.seek(m_Start); if (codec != null) { CompressionInputStream inputStream = codec.createInputStream(fileIn); m_Input = new BufferedReader(new InputStreamReader(inputStream)); m_End = length; } else { m_Input = new BufferedReader(new InputStreamReader(fileIn)); } m_Current = m_Start; m_Key = split.getPath().getName(); }
public BufferedReader loadDataFromFile(String filepath, long offset) { try { Path pt = new Path(filepath); FileSystem fs = FileSystem.get(fsConf); InputStreamReader isr; if (fs.isDirectory(pt)) { // multiple parts isr = new InputStreamReader(OpenMultiplePartsWithOffset(fs, pt, offset)); } else { // single file FSDataInputStream fileHandler = fs.open(pt); if (offset > 0) fileHandler.seek(offset); isr = new InputStreamReader(fileHandler); } BufferedReader reader = new BufferedReader(isr); if (offset > 0) reader.readLine(); // skip first line in case of seek return reader; } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } assert false : "Should not reach here!"; return null; }
@Override protected InputStream open(long offset) throws IOException { FSDataInputStream is = _fs.open(_path); is.seek(offset); return is; }