public static void readHiveResult(String path, OutputStreamWriter outStream, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path dir = new Path(path); if (!fs.exists(dir)) { throw new IOException("can not found path:" + path); } FileStatus[] filelist = fs.listStatus(dir); Long bytesRead = 0l; long maxsize = 1024l * 1024 * 1024 * 10; for (FileStatus f : filelist) { if (!f.isDir() && !f.getPath().getName().startsWith("_")) { FSDataInputStream in = fs.open(f.getPath()); BufferedReader bf = new BufferedReader(new InputStreamReader(in)); String line; while ((line = bf.readLine()) != null) { bytesRead += line.getBytes().length; outStream.write(line.replaceAll("\001", ",").replaceAll("\t", ",")); outStream.write("\r\n"); if (bytesRead >= maxsize) { bf.close(); in.close(); return; } } bf.close(); in.close(); } } return; }
/* * Fetch a file that is in a Hadoop file system. Return a local File. * Interruptible. */ private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException { UUID uniqueId = UUID.randomUUID(); File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); } toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); try { for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); oS.close(); cleanDirNoExceptions(toDir); throw new InterruptedException(); } bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; } catch (ClosedByInterruptException e) { // This can be thrown by the method read. cleanDirNoExceptions(toDir); throw new InterruptedIOException(); } }
/** This tests {@link StringWriter} with non-rolling output. */ @Test public void testNonRollingStringWriter() throws Exception { final int NUM_ELEMENTS = 20; final int PARALLELISM = 2; final String outPath = hdfsURI + "/string-non-rolling-out"; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter()); RollingSink<String> sink = new RollingSink<String>(outPath) .setBucketer(new NonRollingBucketer()) .setPartPrefix("part") .setPendingPrefix("") .setPendingSuffix(""); source .map( new MapFunction<Tuple2<Integer, String>, String>() { private static final long serialVersionUID = 1L; @Override public String map(Tuple2<Integer, String> value) throws Exception { return value.f1; } }) .addSink(sink); env.execute("RollingSink String Write Test"); FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0")); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 0; i < NUM_ELEMENTS; i += 2) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); inStream = dfs.open(new Path(outPath + "/part-1-0")); br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 1; i < NUM_ELEMENTS; i += 2) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); }
/** * Open the file to read from begin to end. Then close the file. Return number of bytes read. * Support both sequential read and position read. */ private long readData(String fname, byte[] buffer, long byteExpected, long beginPosition) throws IOException { long totalByteRead = 0; Path path = getFullyQualifiedPath(fname); FSDataInputStream in = null; try { in = openInputStream(path); long visibleLenFromReadStream = ((HdfsDataInputStream) in).getVisibleLength(); if (visibleLenFromReadStream < byteExpected) { throw new IOException( visibleLenFromReadStream + " = visibleLenFromReadStream < bytesExpected= " + byteExpected); } totalByteRead = readUntilEnd( in, buffer, buffer.length, fname, beginPosition, visibleLenFromReadStream, positionReadOption); in.close(); // reading more data than visibleLeng is OK, but not less if (totalByteRead + beginPosition < byteExpected) { throw new IOException( "readData mismatch in byte read: expected=" + byteExpected + " ; got " + (totalByteRead + beginPosition)); } return totalByteRead + beginPosition; } catch (IOException e) { throw new IOException( "##### Caught Exception in readData. " + "Total Byte Read so far = " + totalByteRead + " beginPosition = " + beginPosition, e); } finally { if (in != null) in.close(); } }
/** * 按行号为单位按行读取文件,行号以1开始,-1代表无限小或无限大 * * @param filePath 文件路径 * @param beginIndex 开始行号 * @param endIndex 结束行号 * @return */ public static List<String> readFileByLinesNoDup(String filePath, int beginIndex, int endIndex) { Set<String> set = new HashSet<String>(); BufferedReader br = null; FSDataInputStream is = null; try { LOG.info("以行为单位读取文件内容,一次读一整行:"); is = fs.open(new Path(filePath)); br = new BufferedReader(new InputStreamReader(is), BUFFER_SIZE); String tempString = null; int lineindex = 0; if (endIndex == -1) { while ((tempString = br.readLine()) != null) { lineindex++; if (lineindex >= beginIndex) set.add(tempString); } } else { while ((tempString = br.readLine()) != null) { lineindex++; if ((lineindex >= beginIndex) && (lineindex <= endIndex)) set.add(tempString); } } if (is != null) { is.close(); } if (br != null) { br.close(); } } catch (IOException e) { LOG.error("读取文件失败:" + e.getMessage()); e.printStackTrace(); } List<String> list = new ArrayList<String>(set.size()); list.addAll(set); return list; }
@Override public void close() throws IOException { // 关闭此输入流 if (null != fileIn) { fileIn.close(); } }
public PrefixEncodedGlobalStatsWithIndex(Path prefixSetPath, FileSystem fs) throws IOException { fileSys = fs; FSDataInputStream termsInput = fileSys.open(prefixSetPath); prefixSet.readFields(termsInput); termsInput.close(); }
static String getStormHomeInZip(FileSystem fs, Path zip, String stormVersion) throws IOException, RuntimeException { FSDataInputStream fsInputStream = fs.open(zip); ZipInputStream zipInputStream = new ZipInputStream(fsInputStream); ZipEntry entry = zipInputStream.getNextEntry(); while (entry != null) { String entryName = entry.getName(); if (entryName.matches("^storm(-" + stormVersion + ")?/")) { fsInputStream.close(); return entryName.replace("/", ""); } entry = zipInputStream.getNextEntry(); } fsInputStream.close(); throw new RuntimeException("Can not find storm home entry in storm zip file."); }
/* * Read some data, skip a few bytes and read more. HADOOP-922. */ private void smallReadSeek(FileSystem fileSys, Path name) throws IOException { if (fileSys instanceof ChecksumFileSystem) { fileSys = ((ChecksumFileSystem) fileSys).getRawFileSystem(); } // Make the buffer size small to trigger code for HADOOP-922 FSDataInputStream stmRaw = fileSys.open(name, 1); byte[] expected = new byte[ONEMB]; Random rand = new Random(seed); rand.nextBytes(expected); // Issue a simple read first. byte[] actual = new byte[128]; stmRaw.seek(100000); stmRaw.read(actual, 0, actual.length); checkAndEraseData(actual, 100000, expected, "First Small Read Test"); // now do a small seek of 4 bytes, within the same block. int newpos1 = 100000 + 128 + 4; stmRaw.seek(newpos1); stmRaw.read(actual, 0, actual.length); checkAndEraseData(actual, newpos1, expected, "Small Seek Bug 1"); // seek another 256 bytes this time int newpos2 = newpos1 + 256; stmRaw.seek(newpos2); stmRaw.read(actual, 0, actual.length); checkAndEraseData(actual, newpos2, expected, "Small Seek Bug 2"); // all done stmRaw.close(); }
private void readData( String path, int length, int start, int SIZE, float[] datas, FileSystem fs) { try { // FileSystem fs = FileSystem.newInstance(conf); FSDataInputStream fileIn = fs.open(new Path(path)); // fileIn.seek(length * start * SIZE); // byte[] temp = new byte[4]; // String result = ""; // System.out.println("datalength:" + datas.length); byte[] datafully = new byte[datas.length * 4]; fileIn.read(length * start * SIZE, datafully, 0, datafully.length); // fileIn.read(length * start * SIZE, datafully, 0, datafully.length); fileIn.close(); fileIn = null; for (int i = 0; i < datas.length; i++) { // for (int j = 0; j < temp.length; j++) { // temp[j] = datafully[4 * i + j]; // } // fileIn.read(temp); datas[i] = Float.intBitsToFloat(getInt(datafully, i * 4)); // result += datas[i] + ","; } // System.out.println(result); // fs.close(); datafully = null; System.gc(); // temp = null; // fs = null; } catch (Exception e) { e.printStackTrace(); } }
private void closeCurrentFile() throws IOException { closeReader(); if (inStream != null) { inStream.close(); inStream = null; } }
public void readFile(String file) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/opt/hadoop-0.20.0/conf/core-site.xml")); FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(file); if (!fileSystem.exists(path)) { System.out.println("File " + file + " does not exists"); return; } FSDataInputStream in = fileSystem.open(path); String filename = file.substring(file.lastIndexOf('/') + 1, file.length()); OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(filename))); byte[] b = new byte[1024]; int numBytes = 0; while ((numBytes = in.read(b)) > 0) { out.write(b, 0, numBytes); } in.close(); out.close(); fileSystem.close(); }
@Test public void testHDFS() { Path file = new Path(hdfsURI + hdPath); org.apache.hadoop.fs.Path result = new org.apache.hadoop.fs.Path(hdfsURI + "/result"); try { FileSystem fs = file.getFileSystem(); Assert.assertTrue("Must be HadoopFileSystem", fs instanceof HadoopFileSystem); DopOneTestEnvironment.setAsContext(); try { WordCount.main(new String[] {file.toString(), result.toString()}); } catch (Throwable t) { t.printStackTrace(); Assert.fail("Test failed with " + t.getMessage()); } finally { DopOneTestEnvironment.unsetAsContext(); } Assert.assertTrue("No result file present", hdfs.exists(result)); // validate output: org.apache.hadoop.fs.FSDataInputStream inStream = hdfs.open(result); StringWriter writer = new StringWriter(); IOUtils.copy(inStream, writer); String resultString = writer.toString(); Assert.assertEquals("hdfs 10\n" + "hello 10\n", resultString); inStream.close(); } catch (IOException e) { e.printStackTrace(); Assert.fail("Error in test: " + e.getMessage()); } }
private static boolean checkFiles( FileSystem fs, String topdir, MyFile[] files, boolean existingOnly) throws IOException { Path root = new Path(topdir); for (int idx = 0; idx < files.length; idx++) { Path fPath = new Path(root, files[idx].getName()); try { fs.getFileStatus(fPath); FSDataInputStream in = fs.open(fPath); byte[] toRead = new byte[files[idx].getSize()]; byte[] toCompare = new byte[files[idx].getSize()]; Random rb = new Random(files[idx].getSeed()); rb.nextBytes(toCompare); assertEquals("Cannnot read file.", toRead.length, in.read(toRead)); in.close(); for (int i = 0; i < toRead.length; i++) { if (toRead[i] != toCompare[i]) { return false; } } toRead = null; toCompare = null; } catch (FileNotFoundException fnfe) { if (!existingOnly) { throw fnfe; } } } return true; }
/** * copies a file from DFS to local working directory * * @param dfsPath is the pathname to a file in DFS * @return the path of the new file in local scratch space * @throws IOException if it can't access the files */ private String copyDBFile(String dfsPath) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path filenamePath = new Path(dfsPath); File localFile = new File(tmpDirFile, filenamePath.getName()); if (!fs.exists(filenamePath)) { throw new IOException("file not found: " + dfsPath); } FSDataInputStream in = fs.open(filenamePath); BufferedReader d = new BufferedReader(new InputStreamReader(in)); BufferedWriter out = new BufferedWriter(new FileWriter(localFile.getPath())); String line; line = d.readLine(); while (line != null) { out.write(line + "\n"); line = d.readLine(); } in.close(); out.close(); return localFile.getPath(); }
public static void createCentersSequenceFile( Configuration conf, FileSystem fs, String centroidsPath, String sequenceFilePath) throws Exception { Path seqFile = new Path(sequenceFilePath); if (fs.exists(seqFile)) { fs.delete(seqFile, true); } FSDataInputStream inputStream = fs.open(new Path(centroidsPath)); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, seqFile, Centroid.class, IntWritable.class); IntWritable value = new IntWritable(0); while (inputStream.available() > 0) { String line = inputStream.readLine(); StringTokenizer tokenizer = new StringTokenizer(line, " "); int dim = tokenizer.countTokens() - 1; int clusterId = Integer.valueOf(tokenizer.nextToken()); double[] coords = new double[dim]; for (int i = 0; i < dim; i++) { coords[i] = Double.valueOf(tokenizer.nextToken()); } Centroid cluster = new Centroid(clusterId, new Point(coords)); writer.append(cluster, value); } IOUtils.closeStream(writer); inputStream.close(); }
@Override protected void closeInternal() throws IOException { LOG.debug("Closing normal index input on {}", path); if (!clone) { inputStream.close(); } }
private String getJobSummary(FileContext fc, Path path) throws IOException { Path qPath = fc.makeQualified(path); FSDataInputStream in = fc.open(qPath); String jobSummaryString = in.readUTF(); in.close(); return jobSummaryString; }
public static void main(String args[]) throws Exception { if (args.length != 2) { System.err.println("argumentos: dir-de-entrada arquivo-de-saida"); System.exit(1); } FileSystem fs = FileSystem.get(confHadoop); Path inPath = new Path(args[0]); Path outPath = new Path(args[1] + "/dataset"); FSDataInputStream in = null; SequenceFile.Writer writer = null; List<Path> files = listFiles(inPath, jpegFilter); try { writer = SequenceFile.createWriter(fs, confHadoop, outPath, Text.class, BytesWritable.class); for (Path p : files) { in = fs.open(p); byte buffer[] = new byte[in.available()]; in.readFully(buffer); writer.append(new Text(p.getName()), new BytesWritable(buffer)); in.close(); } } finally { IOUtils.closeStream(writer); } }
@Test(timeout = 120000) public void testSeekAfterSetDropBehind() throws Exception { // start a cluster LOG.info("testSeekAfterSetDropBehind"); Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; String TEST_PATH = "/test"; int TEST_PATH_LEN = MAX_TEST_FILE_LEN; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, false); // verify that we can seek after setDropBehind FSDataInputStream fis = fs.open(new Path(TEST_PATH)); try { Assert.assertTrue(fis.read() != -1); // create BlockReader fis.setDropBehind(false); // clear BlockReader fis.seek(2); // seek } finally { fis.close(); } } finally { if (cluster != null) { cluster.shutdown(); } } }
/** Tests getPos() functionality. */ @Test public void testGetPos() throws IOException { final Path testFile = new Path("/testfile+1"); // Write a test file. FSDataOutputStream out = hdfs.create(testFile, true); out.writeBytes("0123456789"); out.close(); FSDataInputStream in = hftpFs.open(testFile); // Test read(). for (int i = 0; i < 5; ++i) { assertEquals(i, in.getPos()); in.read(); } // Test read(b, off, len). assertEquals(5, in.getPos()); byte[] buffer = new byte[10]; assertEquals(2, in.read(buffer, 0, 2)); assertEquals(7, in.getPos()); // Test read(b). int bytesRead = in.read(buffer); assertEquals(7 + bytesRead, in.getPos()); // Test EOF. for (int i = 0; i < 100; ++i) { in.read(); } assertEquals(10, in.getPos()); in.close(); }
public FSDataInputStream open(Path file, int bufferSize) throws IOException { FTPClient client = connect(); Path workDir = new Path(client.printWorkingDirectory()); Path absolute = makeAbsolute(workDir, file); FileStatus fileStat = getFileStatus(client, absolute); if (fileStat.isDirectory()) { disconnect(client); throw new IOException("Path " + file + " is a directory."); } client.allocate(bufferSize); Path parent = absolute.getParent(); // Change to parent directory on the // server. Only then can we read the // file // on the server by opening up an InputStream. As a side effect the working // directory on the server is changed to the parent directory of the file. // The FTP client connection is closed when close() is called on the // FSDataInputStream. client.changeWorkingDirectory(parent.toUri().getPath()); InputStream is = client.retrieveFileStream(file.getName()); FSDataInputStream fis = new FSDataInputStream(new FTPInputStream(is, client, statistics)); if (!FTPReply.isPositivePreliminary(client.getReplyCode())) { // The ftpClient is an inconsistent state. Must close the stream // which in turn will logout and disconnect from FTP server fis.close(); throw new IOException("Unable to open file: " + file + ", Aborting"); } return fis; }
static long readHdfsFile(FileSystem fs, Path p, long length, Boolean dropBehind) throws Exception { FSDataInputStream fis = null; long totalRead = 0; try { fis = fs.open(p); if (dropBehind != null) { fis.setDropBehind(dropBehind); } byte buf[] = new byte[8196]; while (length > 0) { int amt = (length > buf.length) ? buf.length : (int) length; int ret = fis.read(buf, 0, amt); if (ret == -1) { return totalRead; } totalRead += ret; length -= ret; } } catch (IOException e) { LOG.error("ioexception", e); } finally { if (fis != null) { fis.close(); } } throw new RuntimeException("unreachable"); }
public static void downloadHdfs(String srcfilePath, String destFilePath) { try { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(srcfilePath), conf); FSDataInputStream hdfsInStream = fs.open(new Path(srcfilePath)); File dstFile = new File(destFilePath); if (!dstFile.getParentFile().exists()) { dstFile.getParentFile().mkdirs(); } OutputStream out = new FileOutputStream(destFilePath); byte[] ioBuffer = new byte[1024]; int readLen = hdfsInStream.read(ioBuffer); while (-1 != readLen) { out.write(ioBuffer, 0, readLen); readLen = hdfsInStream.read(ioBuffer); } out.close(); hdfsInStream.close(); fs.close(); } catch (FileNotFoundException e) { LOG.error("[downloadHdfs]", e); } catch (IOException e) { LOG.error("[downloadHdfs]", e); } }
@Override public ModelInput<StringBuilder> createInput( Class<? extends StringBuilder> dataType, FileSystem fileSystem, Path path, long offset, long fragmentSize, Counter counter) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(path.toUri(), getConf()); FSDataInputStream in = fs.open(path); boolean succeed = false; try { in.seek(offset); ModelInput<StringBuilder> result = format.createInput( dataType, path.toString(), new CountInputStream(in, counter), offset, fragmentSize); succeed = true; return result; } finally { if (succeed == false) { in.close(); } } }
/** * Retrieves the centroids between K-means iterations. * * @return the centroids */ public static long[] getCentroids() throws IOException { Configuration conf = setupConf(); FileSystem fs = FileSystem.get(conf); Path path = new Path(BASE_OUTPUT + CENTROID_FILE); long[] centroids = new long[4]; FSDataInputStream in = fs.open(path); centroids[0] = Long.parseLong(in.readUTF()); in.readChar(); in.readUTF(); in.readChar(); centroids[1] = Long.parseLong(in.readUTF()); in.readChar(); in.readUTF(); in.readChar(); in.readUTF(); in.readChar(); centroids[2] = Long.parseLong(in.readUTF()); in.readChar(); in.readUTF(); in.readChar(); in.readUTF(); in.readChar(); centroids[3] = Long.parseLong(in.readUTF()); in.close(); return centroids; }
private byte[] readFile(Path inputPath, long len) throws Exception { FSDataInputStream fsIn = fs.open(inputPath); // state data will not be that "long" byte[] data = new byte[(int) len]; fsIn.readFully(data); fsIn.close(); return data; }
public ColumnDefinitionFile(Path path) throws IOException { if (path == null) { throw new IOException("A column file path must be specified."); } FSDataInputStream fdis = path.getFileSystem(HadoopUtils.createConfiguration()).open(path); load(fdis); fdis.close(); }
/** * Retrieves the graph root. * * @return the graph root * @throws IOException */ public static String getGraphRoot() throws IOException { Configuration conf = setupConf(); FileSystem fs = FileSystem.get(conf); FSDataInputStream in = fs.open(new Path(SPATH_OUTPUT + SPATH_ROOTFILE)); String username = in.readUTF(); in.close(); return username; }
/** * Gets the list of all completed snapshots. * * @param snapshotDir snapshot directory * @return list of SnapshotDescriptions * @throws IOException File system exception */ private List<SnapshotDescription> getCompletedSnapshots(Path snapshotDir) throws IOException { List<SnapshotDescription> snapshotDescs = new ArrayList<SnapshotDescription>(); // first create the snapshot root path and check to see if it exists FileSystem fs = master.getMasterFileSystem().getFileSystem(); if (snapshotDir == null) snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); // if there are no snapshots, return an empty list if (!fs.exists(snapshotDir)) { return snapshotDescs; } // ignore all the snapshots in progress FileStatus[] snapshots = fs.listStatus( snapshotDir, new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); // loop through all the completed snapshots for (FileStatus snapshot : snapshots) { Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE); // if the snapshot is bad if (!fs.exists(info)) { LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist"); continue; } FSDataInputStream in = null; try { in = fs.open(info); SnapshotDescription desc = SnapshotDescription.parseFrom(in); if (cpHost != null) { try { cpHost.preListSnapshot(desc); } catch (AccessDeniedException e) { LOG.warn( "Current user does not have access to " + desc.getName() + " snapshot. " + "Either you should be owner of this snapshot or admin user."); // Skip this and try for next snapshot continue; } } snapshotDescs.add(desc); // call coproc post hook if (cpHost != null) { cpHost.postListSnapshot(desc); } } catch (IOException e) { LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e); } finally { if (in != null) { in.close(); } } } return snapshotDescs; }