public static ProcedureWALTrailer readTrailer(FSDataInputStream stream, long startPos, long size) throws IOException { // Beginning of the Trailer Jump. 17 = 1 byte version + 8 byte magic + 8 byte offset long trailerPos = size - 17; if (trailerPos < startPos) { throw new InvalidWALDataException("Missing trailer: size=" + size + " startPos=" + startPos); } stream.seek(trailerPos); int version = stream.read(); if (version != TRAILER_VERSION) { throw new InvalidWALDataException( "Invalid Trailer version. got " + version + " expected " + TRAILER_VERSION); } long magic = StreamUtils.readLong(stream); if (magic != TRAILER_MAGIC) { throw new InvalidWALDataException( "Invalid Trailer magic. got " + magic + " expected " + TRAILER_MAGIC); } long trailerOffset = StreamUtils.readLong(stream); stream.seek(trailerOffset); ProcedureWALEntry entry = readEntry(stream); if (entry.getType() != ProcedureWALEntry.Type.PROCEDURE_WAL_EOF) { throw new InvalidWALDataException("Invalid Trailer begin"); } ProcedureWALTrailer trailer = ProcedureWALTrailer.newBuilder().setVersion(version).setTrackerPos(stream.getPos()).build(); return trailer; }
/* * Read some data, skip a few bytes and read more. HADOOP-922. */ private void smallReadSeek(FileSystem fileSys, Path name) throws IOException { if (fileSys instanceof ChecksumFileSystem) { fileSys = ((ChecksumFileSystem) fileSys).getRawFileSystem(); } // Make the buffer size small to trigger code for HADOOP-922 FSDataInputStream stmRaw = fileSys.open(name, 1); byte[] expected = new byte[ONEMB]; Random rand = new Random(seed); rand.nextBytes(expected); // Issue a simple read first. byte[] actual = new byte[128]; stmRaw.seek(100000); stmRaw.read(actual, 0, actual.length); checkAndEraseData(actual, 100000, expected, "First Small Read Test"); // now do a small seek of 4 bytes, within the same block. int newpos1 = 100000 + 128 + 4; stmRaw.seek(newpos1); stmRaw.read(actual, 0, actual.length); checkAndEraseData(actual, newpos1, expected, "Small Seek Bug 1"); // seek another 256 bytes this time int newpos2 = newpos1 + 256; stmRaw.seek(newpos2); stmRaw.read(actual, 0, actual.length); checkAndEraseData(actual, newpos2, expected, "Small Seek Bug 2"); // all done stmRaw.close(); }
// to be used for testing public WikipediaRecordReader(URL fileURL, long start, long end) throws IOException { this.start = start; this.end = end; Path path = new Path("file://", fileURL.getPath()); fsin = FileSystem.getLocal(new Configuration()).open(path); fsin.seek(start); fsin.seek(0); }
public void readTracker(ProcedureStoreTracker tracker) throws IOException { ProcedureWALTrailer trailer = readTrailer(); try { stream.seek(trailer.getTrackerPos()); tracker.readFrom(stream); } finally { stream.seek(startPos); } }
public ProcedureWALTrailer readTrailer() throws IOException { try { return ProcedureWALFormat.readTrailer(stream, startPos, logStatus.getLen()); } finally { stream.seek(startPos); } }
private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset) throws IOException { RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false); Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>(); while (rit.hasNext()) { Path path = rit.next().getPath(); String filename = path.toString().substring(path.getParent().toString().length(), path.toString().length()); if (filename.startsWith("/part-")) { long filesize = fs.getFileStatus(path).getLen(); if (offset < filesize) { FSDataInputStream handle = fs.open(path); if (offset > 0) { handle.seek(offset); } fileHandleList.add(handle); } offset -= filesize; } } if (fileHandleList.size() == 1) return fileHandleList.get(0); else if (fileHandleList.size() > 1) { Enumeration<FSDataInputStream> enu = fileHandleList.elements(); return new SequenceInputStream(enu); } else { System.err.println("Error, no source file loaded. run genSeedDataset.sh fisrt!"); return null; } }
public MutilCharRecordReader(FileSplit inputSplit, Configuration job) throws IOException { maxLineLength = job.getInt("mapred.mutilCharRecordReader.maxlength", Integer.MAX_VALUE); start = inputSplit.getStart(); end = start + inputSplit.getLength(); final Path file = inputSplit.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // 打开文件系统 FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(file); boolean skipFirstLine = false; if (codec != null) { lineReader = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } lineReader = new LineReader(fileIn, job); } if (skipFirstLine) { start += lineReader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
@Override public ModelInput<StringBuilder> createInput( Class<? extends StringBuilder> dataType, FileSystem fileSystem, Path path, long offset, long fragmentSize, Counter counter) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(path.toUri(), getConf()); FSDataInputStream in = fs.open(path); boolean succeed = false; try { in.seek(offset); ModelInput<StringBuilder> result = format.createInput( dataType, path.toString(), new CountInputStream(in, counter), offset, fragmentSize); succeed = true; return result; } finally { if (succeed == false) { in.close(); } } }
/** * Sets {@link #mHdfsInputStream} to a stream from the under storage system with the stream * starting at position. The {@link #mCurrentPosition} is not modified to be position. * * @throws IOException if opening the file fails */ private void getHdfsInputStream(long position) throws IOException { if (mHdfsInputStream == null) { org.apache.hadoop.fs.FileSystem fs = mHdfsPath.getFileSystem(mHadoopConf); mHdfsInputStream = fs.open(mHdfsPath, mHadoopBufferSize); } mHdfsInputStream.seek(position); }
public static void main(String[] args) throws IOException { String uri = args[0]; Configuration configuration = new Configuration(); System.out.println("Trying to get the file system object"); URI uriObj = URI.create(uri); System.out.println("Got URI object " + uri); FileSystem fs = FileSystem.get(uriObj, configuration); FSDataInputStream fsDataInputStream = null; Path hdfsPath = new Path(uri); fsDataInputStream = fs.open(hdfsPath); // This specifies the reading starts from the 0th Byte. fsDataInputStream.seek(0); IOUtils.copyBytes(fsDataInputStream, System.out, 4096, false); System.out.println("*******************************************"); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(hdfsPath))); try { String line; line = br.readLine(); while (line != null) { System.out.println("################ Line is###### " + line); // be sure to read the next line otherwise you'll get an infinite loop line = br.readLine(); } } finally { // you should close out the BufferedReader br.close(); } }
private void init(Counters.Counter readsCounter) throws IOException { if (reader == null) { FSDataInputStream in = fs.open(file); in.seek(segmentOffset); reader = new Reader<K, V>(conf, in, segmentLength, codec, readsCounter); } }
@Override public int read(byte b[], int off, int len) throws IOException { if (mTachyonFileInputStream != null) { int ret = 0; try { ret = mTachyonFileInputStream.read(b, off, len); mCurrentPosition += ret; return ret; } catch (IOException e) { LOG.error(e.getMessage(), e); mTachyonFileInputStream = null; } } if (mHdfsInputStream != null) { b[off] = (byte) readFromHdfsBuffer(); if (b[off] == -1) { return -1; } return 1; } FileSystem fs = mHdfsPath.getFileSystem(mHadoopConf); mHdfsInputStream = fs.open(mHdfsPath, mHadoopBufferSize); mHdfsInputStream.seek(mCurrentPosition); b[off] = (byte) readFromHdfsBuffer(); if (b[off] == -1) { return -1; } return 1; }
@Test(timeout = 120000) public void testSeekAfterSetDropBehind() throws Exception { // start a cluster LOG.info("testSeekAfterSetDropBehind"); Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; String TEST_PATH = "/test"; int TEST_PATH_LEN = MAX_TEST_FILE_LEN; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, false); // verify that we can seek after setDropBehind FSDataInputStream fis = fs.open(new Path(TEST_PATH)); try { Assert.assertTrue(fis.read() != -1); // create BlockReader fis.setDropBehind(false); // clear BlockReader fis.seek(2); // seek } finally { fis.close(); } } finally { if (cluster != null) { cluster.shutdown(); } } }
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); m_Sb.setLength(0); m_Start = split.getStart(); m_End = m_Start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the m_Start of the split FileSystem fs = file.getFileSystem(job); // getFileStatus fileStatus = fs.getFileStatus(split.getPath()); //noinspection deprecation @SuppressWarnings(value = "deprecated") long length = fs.getLength(file); FSDataInputStream fileIn = fs.open(split.getPath()); if (m_Start > 0) fileIn.seek(m_Start); if (codec != null) { CompressionInputStream inputStream = codec.createInputStream(fileIn); m_Input = new BufferedReader(new InputStreamReader(inputStream)); m_End = length; } else { m_Input = new BufferedReader(new InputStreamReader(fileIn)); } m_Current = m_Start; m_Key = split.getPath().getName(); }
/* * Position the input stream at the start of the first record. */ private void positionAtFirstRecord(FSDataInputStream stream) throws IOException { if (start > 0) { // Advance to the start of the first line in our slice. // We use a temporary LineReader to read a partial line and find the // start of the first one on or after our starting position. // In case our slice starts right at the beginning of a line, we need to back // up by one position and then discard the first line. start -= 1; stream.seek(start); LineReader reader = new LineReader(stream); int bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start)); start = start + bytesRead; stream.seek(start); } // else // if start == 0 we're starting at the beginning of a line pos = start; }
public WikipediaRecordReader(FileSplit split, JobConf jobConf) throws IOException { // open the file and seek to the start of the split start = split.getStart(); end = start + split.getLength(); Path file = split.getPath(); FileSystem fs = file.getFileSystem(jobConf); fsin = fs.open(split.getPath()); fsin.seek(start); }
@Override public boolean nextKeyValue() throws IOException { int uncompressedBlockSize = rawInputStream.readInt(); if (uncompressedBlockSize == 0) { // An uncompressed block size of zero means end of file. return false; } else if (uncompressedBlockSize < 0) { throw new EOFException( "Could not read uncompressed block size at position " + rawInputStream.getPos() + " in file " + lzoFile); } int compressedBlockSize = rawInputStream.readInt(); if (compressedBlockSize <= 0) { throw new EOFException( "Could not read compressed block size at position " + rawInputStream.getPos() + " in file " + lzoFile); } // See LzopInputStream.getCompressedData boolean isUncompressedBlock = (uncompressedBlockSize == compressedBlockSize); int numChecksumsToSkip = isUncompressedBlock ? numDecompressedChecksums : numDecompressedChecksums + numCompressedChecksums; // Get the current position. Since we've read two ints, the current block started 8 bytes ago. long pos = rawInputStream.getPos(); curValue.set(pos - 8, uncompressedOffset, uncompressedBlockSize); uncompressedOffset += uncompressedBlockSize; // Seek beyond the checksums and beyond the block data to the beginning of the next block. rawInputStream.seek(pos + compressedBlockSize + (4 * numChecksumsToSkip)); ++numBlocksRead; // Log some progress every so often. if (numBlocksRead % LOG_EVERY_N_BLOCKS == 0) { LOG.info( "Reading block " + numBlocksRead + " at pos " + pos + " of " + totalFileSize + ". Read is " + (100.0 * getProgress()) + "% done. "); } return true; }
public WikipediaRecordReader(FileSplit split, TaskAttemptContext context) throws IOException { // open the file and seek to the start of the split start = split.getStart(); end = start + split.getLength(); Path file = split.getPath(); FileSystem fs = file.getFileSystem(context.getConfiguration()); fsin = fs.open(file); fsin.seek(start); }
@Override public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException { // This method should only be called once (see Hadoop API). However, // there seems to be disagreement between implementations that call // initialize() and Hadoop-BAM's own code that relies on // {@link BAMInputFormat} to call initialize() when the reader is // created. Therefore we add this check for the time being. if (isInitialized) close(); isInitialized = true; final Configuration conf = ctx.getConfiguration(); final FileVirtualSplit split = (FileVirtualSplit) spl; final Path file = split.getPath(); final FileSystem fs = file.getFileSystem(conf); this.stringency = SAMHeaderReader.getValidationStringency(conf); final FSDataInputStream in = fs.open(file); final SAMFileHeader header = SAMHeaderReader.readSAMHeaderFrom(in, conf); codec = new BAMRecordCodec(header); in.seek(0); bci = new BlockCompressedInputStream( new WrapSeekable<FSDataInputStream>(in, fs.getFileStatus(file).getLen(), file)); virtualStart = split.getStartVirtualOffset(); fileStart = virtualStart >>> 16; virtualEnd = split.getEndVirtualOffset(); bci.seek(virtualStart); codec.setInputStream(bci); if (BAMInputFormat.DEBUG_BAM_SPLITTER) { final long recordStart = virtualStart & 0xffff; System.err.println( "XXX inizialized BAMRecordReader byte offset: " + fileStart + " record offset: " + recordStart); } keepReadPairsTogether = SortOrder.queryname.equals(header.getSortOrder()) && conf.getBoolean(BAMInputFormat.KEEP_PAIRED_READS_TOGETHER_PROPERTY, false); readPair = false; lastOfPair = false; intervals = BAMInputFormat.getIntervals(conf); if (intervals != null) { overlapDetector = new OverlapDetector<>(0, 0); overlapDetector.addAll(intervals, intervals); } }
/** * Constructor to map a file containing Succinct data structures via streams. * * @param filePath Path of the file. * @param conf Configuration for the filesystem. * @throws IOException */ public SuccinctIndexedFileStream(Path filePath, Configuration conf) throws IOException { super(filePath, conf); FSDataInputStream is = getStream(filePath); is.seek(endOfCoreStream); int len = is.readInt(); offsets = new int[len]; for (int i = 0; i < len; i++) { offsets[i] = is.readInt(); } }
/** Tests seek(). */ @Test public void testSeek() throws IOException { final Path testFile = new Path("/testfile+1"); FSDataOutputStream out = hdfs.create(testFile, true); out.writeBytes("0123456789"); out.close(); FSDataInputStream in = hftpFs.open(testFile); in.seek(7); assertEquals('7', in.read()); }
@Test(timeout = 10000) public void testSeek() throws IOException { Path path = path("/tests3a/testfile.seek"); writeFile(path, TEST_BUFFER_SIZE * 10); FSDataInputStream inputStream = fs.open(path, TEST_BUFFER_SIZE); inputStream.seek(inputStream.getPos() + MODULUS); testReceivedData(inputStream, TEST_BUFFER_SIZE * 10 - MODULUS); }
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Path p = ((FileSplit) split).getPath(); FileSystem fs = p.getFileSystem(context.getConfiguration()); in = fs.open(p); long start = ((FileSplit) split).getStart(); // find the offset to start at a record boundary offset = (RECORD_LENGTH - (start % RECORD_LENGTH)) % RECORD_LENGTH; in.seek(start + offset); length = ((FileSplit) split).getLength(); }
public void open() throws IOException { if (stream == null) { stream = fs.open(logFile); } if (header == null) { header = ProcedureWALFormat.readHeader(stream); startPos = stream.getPos(); } else { stream.seek(startPos); } }
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // 初始化函数 // System.out.println("initialize"); // Logger.getLogger("KirchhoffMigration").log(Level.INFO, // "enter initialize()"); FileSplit inputsplit = (FileSplit) split; Configuration conf = context.getConfiguration(); LENGTH = conf.getLong("FileSplitLength", 0); SPILL = LENGTH / conf.getInt("SplitPerMap", 1); // LENGTH = 8; // SPILL = 8; assert (LENGTH >= SPILL); // System.out.println("length:" + LENGTH); // System.out.println("spill:" + SPILL); String filename = inputsplit.getPath().toString(); // System.out.println("filename:" + filename); // String buf = filename.substring(filename.lastIndexOf("fcxy") + 4, // filename.lastIndexOf(".")); // int count = Integer.parseInt(buf); // System.out.println(filename); // start = inputsplit.getStart(); // 得到此分片开始位置 start = inputsplit.getStart(); shotNum += start * 8 / Float.SIZE; long offset = LENGTH >= inputsplit.getLength() ? inputsplit.getLength() : LENGTH; end = start + offset; // 结束此分片位置 // System.out.println("inputSplitLength:" + split.getLength()); // System.out.println("end:" + end); // start = inputsplit.getStart(); // 得到此分片开始位置 // end = start + inputsplit.getLength();// 结束此分片位置 // System.out.println("start:" + start + " ,end:" + end); final Path file = inputsplit.getPath(); // System.out.println(file.toString()); // 打开文件 FileSystem fs = file.getFileSystem(context.getConfiguration()); fileIn = fs.open(inputsplit.getPath()); // 关键位置2 // 将文件指针移动到当前分片,因为每次默认打开文件时,`其指针指向开头 fileIn.seek(start); // in = new LineReader(fileIn, context.getConfiguration()); // if (start != 0) { // System.out.println("not the first split"); // // 关键解决位置1 // // // 如果这不是第一个分片,那么假设第一个分片是0——4,那么,第4个位置已经被读取,则需要跳过4,否则会产生读入错误,因为你回头又去读之前读过的地方 // start += (end - pos + 1); // } pos = start; }
public XmlRecordReader(FileSplit split, TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); startTag = conf.get(START_TAG_KEY).getBytes("utf-8"); endTag = conf.get(END_TAG_KEY).getBytes("utf-8"); // open the file and seek to the start of the split start = split.getStart(); end = start + split.getLength(); Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); fsin = fs.open(split.getPath()); fsin.seek(start); }
@Override public void seek(long pos) throws IOException { try { in.seek(pos); } catch (FileNotFoundException e) { tryOpen().seek(pos); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() tryOpen().seek(pos); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() tryOpen().seek(pos); } this.pos = pos; }
public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); FSDataInputStream in = null; try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); in.seek(3); // go back to pos 3 of the file IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } }
/** * From Design Pattern, O'Reilly... This method takes as arguments the map task’s assigned * InputSplit and TaskAttemptContext, and prepares the record reader. For file-based input * formats, this is a good place to seek to the byte position in the file to begin reading. */ @Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { // This InputSplit is a FileInputSplit FileSplit split = (FileSplit) genericSplit; // Retrieve configuration, and Max allowed // bytes for a single record Configuration job = context.getConfiguration(); this.delimiterRegex = "^.*<REUTERS.*$"; delimiterPattern = Pattern.compile(delimiterRegex); // Split "S" is responsible for all records // starting from "start" and "end" positions start = split.getStart(); end = start + split.getLength(); // Retrieve file containing Split "S" final Path file = split.getPath(); FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); // If Split "S" starts at byte 0, first line will be processed // If Split "S" does not start at byte 0, first line has been already // processed by "S-1" and therefore needs to be silently ignored boolean skipFirstLine = false; if (start != 0) { skipFirstLine = true; // Set the file pointer at "start - 1" position. // This is to make sure we won't miss any line // It could happen if "start" is located on a EOL --start; fileIn.seek(start); } in = new LineReader(fileIn, job); // If first line needs to be skipped, read first line // and stores its content to a dummy Text if (skipFirstLine) { Text dummy = new Text(); // Reset "start" to "start + line offset" start += in.readLine(dummy, 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } // Position is the actual start this.pos = start; }
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); startTag = conf.get(START_TAG_KEY).getBytes("utf-8"); endTag = conf.get(END_TAG_KEY).getBytes("utf-8"); FileSplit fileSplit = (FileSplit) split; // open the file and seek to the start of the split start = fileSplit.getStart(); end = start + fileSplit.getLength(); Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); fsin = fs.open(fileSplit.getPath()); fsin.seek(start); }