public static void downloadHdfs(String srcfilePath, String destFilePath) { try { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(srcfilePath), conf); FSDataInputStream hdfsInStream = fs.open(new Path(srcfilePath)); File dstFile = new File(destFilePath); if (!dstFile.getParentFile().exists()) { dstFile.getParentFile().mkdirs(); } OutputStream out = new FileOutputStream(destFilePath); byte[] ioBuffer = new byte[1024]; int readLen = hdfsInStream.read(ioBuffer); while (-1 != readLen) { out.write(ioBuffer, 0, readLen); readLen = hdfsInStream.read(ioBuffer); } out.close(); hdfsInStream.close(); fs.close(); } catch (FileNotFoundException e) { LOG.error("[downloadHdfs]", e); } catch (IOException e) { LOG.error("[downloadHdfs]", e); } }
/** Tests getPos() functionality. */ @Test public void testGetPos() throws IOException { final Path testFile = new Path("/testfile+1"); // Write a test file. FSDataOutputStream out = hdfs.create(testFile, true); out.writeBytes("0123456789"); out.close(); FSDataInputStream in = hftpFs.open(testFile); // Test read(). for (int i = 0; i < 5; ++i) { assertEquals(i, in.getPos()); in.read(); } // Test read(b, off, len). assertEquals(5, in.getPos()); byte[] buffer = new byte[10]; assertEquals(2, in.read(buffer, 0, 2)); assertEquals(7, in.getPos()); // Test read(b). int bytesRead = in.read(buffer); assertEquals(7 + bytesRead, in.getPos()); // Test EOF. for (int i = 0; i < 100; ++i) { in.read(); } assertEquals(10, in.getPos()); in.close(); }
/* * Read some data, skip a few bytes and read more. HADOOP-922. */ private void smallReadSeek(FileSystem fileSys, Path name) throws IOException { if (fileSys instanceof ChecksumFileSystem) { fileSys = ((ChecksumFileSystem) fileSys).getRawFileSystem(); } // Make the buffer size small to trigger code for HADOOP-922 FSDataInputStream stmRaw = fileSys.open(name, 1); byte[] expected = new byte[ONEMB]; Random rand = new Random(seed); rand.nextBytes(expected); // Issue a simple read first. byte[] actual = new byte[128]; stmRaw.seek(100000); stmRaw.read(actual, 0, actual.length); checkAndEraseData(actual, 100000, expected, "First Small Read Test"); // now do a small seek of 4 bytes, within the same block. int newpos1 = 100000 + 128 + 4; stmRaw.seek(newpos1); stmRaw.read(actual, 0, actual.length); checkAndEraseData(actual, newpos1, expected, "Small Seek Bug 1"); // seek another 256 bytes this time int newpos2 = newpos1 + 256; stmRaw.seek(newpos2); stmRaw.read(actual, 0, actual.length); checkAndEraseData(actual, newpos2, expected, "Small Seek Bug 2"); // all done stmRaw.close(); }
static long readHdfsFile(FileSystem fs, Path p, long length, Boolean dropBehind) throws Exception { FSDataInputStream fis = null; long totalRead = 0; try { fis = fs.open(p); if (dropBehind != null) { fis.setDropBehind(dropBehind); } byte buf[] = new byte[8196]; while (length > 0) { int amt = (length > buf.length) ? buf.length : (int) length; int ret = fis.read(buf, 0, amt); if (ret == -1) { return totalRead; } totalRead += ret; length -= ret; } } catch (IOException e) { LOG.error("ioexception", e); } finally { if (fis != null) { fis.close(); } } throw new RuntimeException("unreachable"); }
@Test(timeout = 120000) public void testSeekAfterSetDropBehind() throws Exception { // start a cluster LOG.info("testSeekAfterSetDropBehind"); Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; String TEST_PATH = "/test"; int TEST_PATH_LEN = MAX_TEST_FILE_LEN; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, false); // verify that we can seek after setDropBehind FSDataInputStream fis = fs.open(new Path(TEST_PATH)); try { Assert.assertTrue(fis.read() != -1); // create BlockReader fis.setDropBehind(false); // clear BlockReader fis.seek(2); // seek } finally { fis.close(); } } finally { if (cluster != null) { cluster.shutdown(); } } }
public static ProcedureWALTrailer readTrailer(FSDataInputStream stream, long startPos, long size) throws IOException { // Beginning of the Trailer Jump. 17 = 1 byte version + 8 byte magic + 8 byte offset long trailerPos = size - 17; if (trailerPos < startPos) { throw new InvalidWALDataException("Missing trailer: size=" + size + " startPos=" + startPos); } stream.seek(trailerPos); int version = stream.read(); if (version != TRAILER_VERSION) { throw new InvalidWALDataException( "Invalid Trailer version. got " + version + " expected " + TRAILER_VERSION); } long magic = StreamUtils.readLong(stream); if (magic != TRAILER_MAGIC) { throw new InvalidWALDataException( "Invalid Trailer magic. got " + magic + " expected " + TRAILER_MAGIC); } long trailerOffset = StreamUtils.readLong(stream); stream.seek(trailerOffset); ProcedureWALEntry entry = readEntry(stream); if (entry.getType() != ProcedureWALEntry.Type.PROCEDURE_WAL_EOF) { throw new InvalidWALDataException("Invalid Trailer begin"); } ProcedureWALTrailer trailer = ProcedureWALTrailer.newBuilder().setVersion(version).setTrackerPos(stream.getPos()).build(); return trailer; }
private void readData( String path, int length, int start, int SIZE, float[] datas, FileSystem fs) { try { // FileSystem fs = FileSystem.newInstance(conf); FSDataInputStream fileIn = fs.open(new Path(path)); // fileIn.seek(length * start * SIZE); // byte[] temp = new byte[4]; // String result = ""; // System.out.println("datalength:" + datas.length); byte[] datafully = new byte[datas.length * 4]; fileIn.read(length * start * SIZE, datafully, 0, datafully.length); // fileIn.read(length * start * SIZE, datafully, 0, datafully.length); fileIn.close(); fileIn = null; for (int i = 0; i < datas.length; i++) { // for (int j = 0; j < temp.length; j++) { // temp[j] = datafully[4 * i + j]; // } // fileIn.read(temp); datas[i] = Float.intBitsToFloat(getInt(datafully, i * 4)); // result += datas[i] + ","; } // System.out.println(result); // fs.close(); datafully = null; System.gc(); // temp = null; // fs = null; } catch (Exception e) { e.printStackTrace(); } }
public void readFile(String file) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/opt/hadoop-0.20.0/conf/core-site.xml")); FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(file); if (!fileSystem.exists(path)) { System.out.println("File " + file + " does not exists"); return; } FSDataInputStream in = fileSystem.open(path); String filename = file.substring(file.lastIndexOf('/') + 1, file.length()); OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(filename))); byte[] b = new byte[1024]; int numBytes = 0; while ((numBytes = in.read(b)) > 0) { out.write(b, 0, numBytes); } in.close(); out.close(); fileSystem.close(); }
public boolean nextKeyValue() throws IOException { if (offset >= length) { return false; } int read = 0; while (read < RECORD_LENGTH) { long newRead = in.read(buffer, read, RECORD_LENGTH - read); if (newRead == -1) { if (read == 0) { return false; } else { throw new EOFException("read past eof"); } } read += newRead; } if (key == null) { key = new Text(); } if (value == null) { value = new Text(); } key.set(buffer, 0, KEY_LENGTH); value.set(buffer, KEY_LENGTH, VALUE_LENGTH); offset += RECORD_LENGTH; return true; }
private static boolean checkFiles( FileSystem fs, String topdir, MyFile[] files, boolean existingOnly) throws IOException { Path root = new Path(topdir); for (int idx = 0; idx < files.length; idx++) { Path fPath = new Path(root, files[idx].getName()); try { fs.getFileStatus(fPath); FSDataInputStream in = fs.open(fPath); byte[] toRead = new byte[files[idx].getSize()]; byte[] toCompare = new byte[files[idx].getSize()]; Random rb = new Random(files[idx].getSeed()); rb.nextBytes(toCompare); assertEquals("Cannnot read file.", toRead.length, in.read(toRead)); in.close(); for (int i = 0; i < toRead.length; i++) { if (toRead[i] != toCompare[i]) { return false; } } toRead = null; toCompare = null; } catch (FileNotFoundException fnfe) { if (!existingOnly) { throw fnfe; } } } return true; }
private boolean readUntilMatch(byte[] match, boolean withinBlock) throws IOException { int i = 0; while (true) { int b = fsin.read(); // end of file: if (b == -1) { return false; } // save to buffer: if (withinBlock) { buffer.write(b); } // check if we're matching: if (b == match[i]) { i++; if (i >= match.length) { return true; } } else { i = 0; } // see if we've passed the stop point: if (!withinBlock && i == 0 && fsin.getPos() >= end) { return false; } } }
/* * Fetch a file that is in a Hadoop file system. Return a local File. * Interruptible. */ private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException { UUID uniqueId = UUID.randomUUID(); File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); } toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); try { for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); oS.close(); cleanDirNoExceptions(toDir); throw new InterruptedException(); } bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; } catch (ClosedByInterruptException e) { // This can be thrown by the method read. cleanDirNoExceptions(toDir); throw new InterruptedIOException(); } }
/** * Load from the filesystem * * @param fs filesystem * @param path path * @return a loaded CD * @throws IOException IO problems */ public static ClusterDescription load(FileSystem fs, Path path) throws IOException, JsonParseException, JsonMappingException { FileStatus status = fs.getFileStatus(path); byte[] b = new byte[(int) status.getLen()]; FSDataInputStream dataInputStream = fs.open(path); int count = dataInputStream.read(b); String json = new String(b, 0, count, UTF_8); return fromJson(json); }
// read a file using fetchBlockByteRange() private boolean checkFile2(FSDataInputStream in, byte[] expected) { byte[] toRead = new byte[expected.length]; try { assertEquals("Cannot read file", toRead.length, in.read(0, toRead, 0, toRead.length)); } catch (IOException e) { return false; } return checkFile(toRead, expected); }
private String retrieveLineSeparator(FSDataInputStream fis) throws IOException { char current; String lineSeparator = ""; while (fis.available() > 0) { current = (char) fis.read(); if ((current == '\n') || (current == '\r')) { lineSeparator += current; if (fis.available() > 0) { char next = (char) fis.read(); if ((next == '\r') || (next == '\n')) { lineSeparator += next; } } return lineSeparator; } } return null; }
@Override public int readByteBuffer(FSDataInputStream file, ByteBuffer dest) throws IOException { int pos = dest.position(); int result = file.read(dest); if (result > 0) { // Ensure this explicitly since versions before 2.7 read doesn't do it. dest.position(pos + result); } return result; }
/** Tests seek(). */ @Test public void testSeek() throws IOException { final Path testFile = new Path("/testfile+1"); FSDataOutputStream out = hdfs.create(testFile, true); out.writeBytes("0123456789"); out.close(); FSDataInputStream in = hftpFs.open(testFile); in.seek(7); assertEquals('7', in.read()); }
private int readFromHdfsBuffer() throws IOException { if (mBufferPosition < mBufferLimit) { return mBuffer[mBufferPosition++]; } LOG.error("Reading from HDFS directly"); while ((mBufferLimit = mHdfsInputStream.read(mBuffer)) == 0) { LOG.error("Read 0 bytes in readFromHdfsBuffer for " + mHdfsPath); } if (mBufferLimit == -1) { return -1; } mBufferPosition = 0; return mBuffer[mBufferPosition++]; }
@Override public int read(long position, byte[] buffer, int offset, int length) throws IOException { int n; try { n = in.read(position, buffer, offset, length); } catch (FileNotFoundException e) { n = tryOpen().read(position, buffer, offset, length); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() n = tryOpen().read(position, buffer, offset, length); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() n = tryOpen().read(position, buffer, offset, length); } return n; }
// read a file using blockSeekTo() private boolean checkFile1(FSDataInputStream in, byte[] expected) { byte[] toRead = new byte[expected.length]; int totalRead = 0; int nRead = 0; try { while ((nRead = in.read(toRead, totalRead, toRead.length - totalRead)) > 0) { totalRead += nRead; } } catch (IOException e) { return false; } assertEquals("Cannot read file.", toRead.length, totalRead); return checkFile(toRead, expected); }
@Override public int read() throws IOException { int res; try { res = in.read(); } catch (FileNotFoundException e) { res = tryOpen().read(); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() res = tryOpen().read(); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() res = tryOpen().read(); } if (res > 0) pos += 1; return res; }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { // 下一组值 // tips:以后在这种函数中最好不要有输出,费时 // LOG.info("正在读取下一个,嘿嘿"); if (pos >= end) { more = false; // System.out.println("pos>=end"); return false; } // System.out.println("nextKeyValue"); if (null == key) { key = -1; } if (null == value) { value = ""; } // Text nowline = new Text();// 保存当前行的内容 // int readsize = in.readLine(nowline); byte[] temp = new byte[Float.SIZE / 8]; long length = (end - start) > SPILL ? SPILL : (end - start); byte[] datas = new byte[(int) length]; float[] data = new float[(int) length * 8 / Float.SIZE]; int count = 0; int nnn = fileIn.read(datas); for (int i = 0; i < datas.length * 8 / Float.SIZE; i++) { // fileIn.read(temp); for (int j = 0; j < temp.length; j++) { temp[j] = datas[4 * i + j]; } data[i] = Float.intBitsToFloat(getInt(temp)); count++; } key = shotNum / 2; shotNum += data.length; String temp_value = ""; for (int i = 0; i < count; i++) { temp_value += String.valueOf(data[i]) + ","; } temp_value = temp_value.substring(0, temp_value.lastIndexOf(",")); // temp_value = String.valueOf(count); value = temp_value; // 更新当前读取到位置 pos += data.length * temp.length; System.out.println("myinput"); System.out.println("key:" + key + ",value:" + value); return true; }
@Override public int read(byte b[], int off, int len) throws IOException { int n; try { n = in.read(b, off, len); } catch (FileNotFoundException e) { n = tryOpen().read(b, off, len); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() n = tryOpen().read(b, off, len); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() n = tryOpen().read(b, off, len); } if (n > 0) pos += n; assert (in.getPos() == pos); return n; }
/** * Saves the centroids between K-means iterations. * * @param counters - the counters containing the centroids */ public static void setCentroids(Counters counters) throws Exception { Configuration conf = setupConf(); FileSystem fs = FileSystem.get(conf); Path path = new Path(BASE_OUTPUT); Path tempPath = new Path(BASE_OUTPUT + TEMP_FILE); FSDataOutputStream out = null; if (!fs.exists(path)) fs.mkdirs(path); path = new Path(BASE_OUTPUT + CENTROID_FILE); if (fs.exists(path)) fs.rename(path, tempPath); out = fs.create(path); out.writeUTF(new Long(counters.findCounter(Common.Centroids.ITERATION).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(Common.Centroids.LOW.toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(new Long(counters.findCounter(Common.Centroids.LOW).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(new Long(counters.findCounter(Common.Centroids.COUNT_LOW).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(Common.Centroids.MEDIUM.toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(new Long(counters.findCounter(Common.Centroids.MEDIUM).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF( new Long(counters.findCounter(Common.Centroids.COUNT_MEDIUM).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(Common.Centroids.HIGH.toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(new Long(counters.findCounter(Common.Centroids.HIGH).getValue()).toString()); out.writeChar(Common.SEPARATOR.charAt(0)); out.writeUTF(new Long(counters.findCounter(Common.Centroids.COUNT_HIGH).getValue()).toString()); out.writeChar('\n'); if (fs.exists(tempPath)) { FSDataInputStream in = fs.open(tempPath); int i = 0; while ((i = in.read()) != -1) out.write(i); in.close(); fs.delete(tempPath, false); } out.close(); }
/** * 读取文件内容 * * @param filePath 文件路径 */ public static String readFile(String filePath) { StringBuffer sb = new StringBuffer(); try { FSDataInputStream is = fs.open(new Path(filePath)); int byteread = 0; byte[] tempbytes = new byte[BUFFER_SIZE]; while ((byteread = is.read(tempbytes)) != -1) { sb.append(new String(tempbytes, 0, byteread)); } if (is != null) { is.close(); } } catch (Exception e1) { LOG.error("读取文件失败:" + e1.getMessage()); e1.printStackTrace(); } return sb.toString(); }
/* * Fetch a file that is in a Hadoop file system. Return a local File. */ private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException { File toFile = new File(tempDir, fromPath.toUri().getPath()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); } toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; }
public int histInt(String file, String tag, Configuration conf) { int value = 1; try { // FileSystem fs = FileSystem.get(conf); // FSDataInputStream fsIn = fs.open(new Path(file)); // BufferedReader br = new BufferedReader(new InputStreamReader(fsIn)); // String line = ""; // while ((line = br.readLine()) != null) { // int tagPos = line.indexOf(tag + "="); // if (tagPos != -1) { // String subStr = line.substring(tagPos + tag.length()); // int firstCommaIndex = subStr.indexOf(','); // int firstEqualIndex = subStr.indexOf('='); // value = Integer.parseInt(subStr.substring( // firstEqualIndex + 1, firstCommaIndex).trim()); // break; // } // } // br.close(); FileSystem fs = FileSystem.get(conf); Path filepath = new Path(file); FSDataInputStream fsIn = fs.open(filepath); byte[] temp = new byte[250]; fsIn.read(temp); String buf = getString(temp); // System.out.println("buf:" + buf); // System.out.println("tag:" + tag); if (buf.indexOf(tag + "=") != -1) { buf = buf.substring(buf.indexOf(tag + "=") + tag.length() + 1).trim(); // System.out.println("middlebuf:" + buf); buf = buf.substring(0, buf.indexOf(",")).trim(); // System.out.println("endbuf:" + buf); value = Integer.parseInt(buf); // System.out.println("value:" + value); fsIn.close(); } } catch (Exception e) { e.printStackTrace(); } return value; }
// // validates that file encounters BlockMissingException // private void validateFile(FileSystem fileSys, Path name) throws IOException { FSDataInputStream stm = fileSys.open(name); final byte[] b = new byte[4192]; int num = 0; boolean gotException = false; try { while (num >= 0) { num = stm.read(b); if (num < 0) { break; } } } catch (BlockMissingException e) { gotException = true; } stm.close(); assertTrue("Expected BlockMissingException ", gotException); }
private void readData( String path, int length, int start, int SIZE, float[] datas, Configuration conf) { try { FileSystem fs = FileSystem.get(conf); FSDataInputStream fileIn = fs.open(new Path(path)); fileIn.seek(length * start * SIZE); byte[] temp = new byte[4]; // String result = ""; for (int i = 0; i < datas.length; i++) { fileIn.read(temp); datas[i] = Float.intBitsToFloat(getInt(temp)); // result += datas[i] + ","; } // System.out.println(result); fileIn.close(); // fs.close(); fs = null; } catch (Exception e) { e.printStackTrace(); } }
private void seekReadFile(FileSystem fileSys, Path name) throws IOException { FSDataInputStream stm = fileSys.open(name, 4096); byte[] expected = new byte[ONEMB]; Random rand = new Random(seed); rand.nextBytes(expected); // First read 128 bytes to set count in BufferedInputStream byte[] actual = new byte[128]; stm.read(actual, 0, actual.length); // Now read a byte array that is bigger than the internal buffer actual = new byte[100000]; IOUtils.readFully(stm, actual, 0, actual.length); checkAndEraseData(actual, 128, expected, "First Read Test"); // now do a small seek, within the range that is already read stm.seek(96036); // 4 byte seek actual = new byte[128]; IOUtils.readFully(stm, actual, 0, actual.length); checkAndEraseData(actual, 96036, expected, "Seek Bug"); // all done stm.close(); }