private void init(Counters.Counter readsCounter) throws IOException { if (reader == null) { FSDataInputStream in = fs.open(file); in.seek(segmentOffset); reader = new Reader<K, V>(conf, in, segmentLength, codec, readsCounter); } }
private static boolean checkFiles( FileSystem fs, String topdir, MyFile[] files, boolean existingOnly) throws IOException { Path root = new Path(topdir); for (int idx = 0; idx < files.length; idx++) { Path fPath = new Path(root, files[idx].getName()); try { fs.getFileStatus(fPath); FSDataInputStream in = fs.open(fPath); byte[] toRead = new byte[files[idx].getSize()]; byte[] toCompare = new byte[files[idx].getSize()]; Random rb = new Random(files[idx].getSeed()); rb.nextBytes(toCompare); assertEquals("Cannnot read file.", toRead.length, in.read(toRead)); in.close(); for (int i = 0; i < toRead.length; i++) { if (toRead[i] != toCompare[i]) { return false; } } toRead = null; toCompare = null; } catch (FileNotFoundException fnfe) { if (!existingOnly) { throw fnfe; } } } return true; }
@Test public void testHDFS() { Path file = new Path(hdfsURI + hdPath); org.apache.hadoop.fs.Path result = new org.apache.hadoop.fs.Path(hdfsURI + "/result"); try { FileSystem fs = file.getFileSystem(); Assert.assertTrue("Must be HadoopFileSystem", fs instanceof HadoopFileSystem); DopOneTestEnvironment.setAsContext(); try { WordCount.main(new String[] {file.toString(), result.toString()}); } catch (Throwable t) { t.printStackTrace(); Assert.fail("Test failed with " + t.getMessage()); } finally { DopOneTestEnvironment.unsetAsContext(); } Assert.assertTrue("No result file present", hdfs.exists(result)); // validate output: org.apache.hadoop.fs.FSDataInputStream inStream = hdfs.open(result); StringWriter writer = new StringWriter(); IOUtils.copy(inStream, writer); String resultString = writer.toString(); Assert.assertEquals("hdfs 10\n" + "hello 10\n", resultString); inStream.close(); } catch (IOException e) { e.printStackTrace(); Assert.fail("Error in test: " + e.getMessage()); } }
/** * copies a file from DFS to local working directory * * @param dfsPath is the pathname to a file in DFS * @return the path of the new file in local scratch space * @throws IOException if it can't access the files */ private String copyDBFile(String dfsPath) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path filenamePath = new Path(dfsPath); File localFile = new File(tmpDirFile, filenamePath.getName()); if (!fs.exists(filenamePath)) { throw new IOException("file not found: " + dfsPath); } FSDataInputStream in = fs.open(filenamePath); BufferedReader d = new BufferedReader(new InputStreamReader(in)); BufferedWriter out = new BufferedWriter(new FileWriter(localFile.getPath())); String line; line = d.readLine(); while (line != null) { out.write(line + "\n"); line = d.readLine(); } in.close(); out.close(); return localFile.getPath(); }
public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException { FileSystem fs = output.getFileSystem(conf); Vector weightsPerLabel = null; Vector perLabelThetaNormalizer = null; Vector weightsPerFeature = null; Matrix weightsPerLabelAndFeature; float alphaI; FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin")); try { alphaI = in.readFloat(); weightsPerFeature = VectorWritable.readVector(in); weightsPerLabel = VectorWritable.readVector(in); perLabelThetaNormalizer = VectorWritable.readVector(in); weightsPerLabelAndFeature = new SparseMatrix(weightsPerLabel.size(), weightsPerFeature.size()); for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) { weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in)); } } finally { Closeables.closeQuietly(in); } NaiveBayesModel model = new NaiveBayesModel( weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel, perLabelThetaNormalizer, alphaI); model.validate(); return model; }
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); m_Sb.setLength(0); m_Start = split.getStart(); m_End = m_Start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the m_Start of the split FileSystem fs = file.getFileSystem(job); // getFileStatus fileStatus = fs.getFileStatus(split.getPath()); //noinspection deprecation @SuppressWarnings(value = "deprecated") long length = fs.getLength(file); FSDataInputStream fileIn = fs.open(split.getPath()); if (m_Start > 0) fileIn.seek(m_Start); if (codec != null) { CompressionInputStream inputStream = codec.createInputStream(fileIn); m_Input = new BufferedReader(new InputStreamReader(inputStream)); m_End = length; } else { m_Input = new BufferedReader(new InputStreamReader(fileIn)); } m_Current = m_Start; m_Key = split.getPath().getName(); }
public static void main(String[] args) throws IOException { String uri = args[0]; Configuration configuration = new Configuration(); System.out.println("Trying to get the file system object"); URI uriObj = URI.create(uri); System.out.println("Got URI object " + uri); FileSystem fs = FileSystem.get(uriObj, configuration); FSDataInputStream fsDataInputStream = null; Path hdfsPath = new Path(uri); fsDataInputStream = fs.open(hdfsPath); // This specifies the reading starts from the 0th Byte. fsDataInputStream.seek(0); IOUtils.copyBytes(fsDataInputStream, System.out, 4096, false); System.out.println("*******************************************"); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(hdfsPath))); try { String line; line = br.readLine(); while (line != null) { System.out.println("################ Line is###### " + line); // be sure to read the next line otherwise you'll get an infinite loop line = br.readLine(); } } finally { // you should close out the BufferedReader br.close(); } }
public PrefixEncodedGlobalStatsWithIndex(Path prefixSetPath, FileSystem fs) throws IOException { fileSys = fs; FSDataInputStream termsInput = fileSys.open(prefixSetPath); prefixSet.readFields(termsInput); termsInput.close(); }
private String getJobSummary(FileContext fc, Path path) throws IOException { Path qPath = fc.makeQualified(path); FSDataInputStream in = fc.open(qPath); String jobSummaryString = in.readUTF(); in.close(); return jobSummaryString; }
@Override public boolean next(LongWritable key, ProtobufWritable<M> value) throws IOException { if (pos_ > end_ || fileIn_.available() <= 0) { return false; } key.set(pos_); if (Protobufs.KNOWN_GOOD_POSITION_MARKER.length + pos_ <= end_) fileIn_.skipBytes(Protobufs.KNOWN_GOOD_POSITION_MARKER.length); pos_ = fileIn_.getPos(); try { if (protoBuilder == null) { protoBuilder = Protobufs.getMessageBuilder(typeRef_.getRawClass()); } Message.Builder builder = protoBuilder.clone(); final boolean success = builder.mergeDelimitedFrom(fileIn_); if (success) { value.set((M) builder.build()); } return success; } catch (InvalidProtocolBufferException e) { LOG.error( "Invalid Protobuf exception while building " + typeRef_.getRawClass().getName(), e); } catch (UninitializedMessageException ume) { LOG.error( "Uninitialized Message Exception while building " + typeRef_.getRawClass().getName(), ume); } return false; }
private void readData( String path, int length, int start, int SIZE, float[] datas, FileSystem fs) { try { // FileSystem fs = FileSystem.newInstance(conf); FSDataInputStream fileIn = fs.open(new Path(path)); // fileIn.seek(length * start * SIZE); // byte[] temp = new byte[4]; // String result = ""; // System.out.println("datalength:" + datas.length); byte[] datafully = new byte[datas.length * 4]; fileIn.read(length * start * SIZE, datafully, 0, datafully.length); // fileIn.read(length * start * SIZE, datafully, 0, datafully.length); fileIn.close(); fileIn = null; for (int i = 0; i < datas.length; i++) { // for (int j = 0; j < temp.length; j++) { // temp[j] = datafully[4 * i + j]; // } // fileIn.read(temp); datas[i] = Float.intBitsToFloat(getInt(datafully, i * 4)); // result += datas[i] + ","; } // System.out.println(result); // fs.close(); datafully = null; System.gc(); // temp = null; // fs = null; } catch (Exception e) { e.printStackTrace(); } }
public MutilCharRecordReader(FileSplit inputSplit, Configuration job) throws IOException { maxLineLength = job.getInt("mapred.mutilCharRecordReader.maxlength", Integer.MAX_VALUE); start = inputSplit.getStart(); end = start + inputSplit.getLength(); final Path file = inputSplit.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // 打开文件系统 FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(file); boolean skipFirstLine = false; if (codec != null) { lineReader = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } lineReader = new LineReader(fileIn, job); } if (skipFirstLine) { start += lineReader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
@Override public ModelInput<StringBuilder> createInput( Class<? extends StringBuilder> dataType, FileSystem fileSystem, Path path, long offset, long fragmentSize, Counter counter) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(path.toUri(), getConf()); FSDataInputStream in = fs.open(path); boolean succeed = false; try { in.seek(offset); ModelInput<StringBuilder> result = format.createInput( dataType, path.toString(), new CountInputStream(in, counter), offset, fragmentSize); succeed = true; return result; } finally { if (succeed == false) { in.close(); } } }
public static void main(String args[]) throws Exception { if (args.length != 2) { System.err.println("argumentos: dir-de-entrada arquivo-de-saida"); System.exit(1); } FileSystem fs = FileSystem.get(confHadoop); Path inPath = new Path(args[0]); Path outPath = new Path(args[1] + "/dataset"); FSDataInputStream in = null; SequenceFile.Writer writer = null; List<Path> files = listFiles(inPath, jpegFilter); try { writer = SequenceFile.createWriter(fs, confHadoop, outPath, Text.class, BytesWritable.class); for (Path p : files) { in = fs.open(p); byte buffer[] = new byte[in.available()]; in.readFully(buffer); writer.append(new Text(p.getName()), new BytesWritable(buffer)); in.close(); } } finally { IOUtils.closeStream(writer); } }
public static ProcedureWALTrailer readTrailer(FSDataInputStream stream, long startPos, long size) throws IOException { // Beginning of the Trailer Jump. 17 = 1 byte version + 8 byte magic + 8 byte offset long trailerPos = size - 17; if (trailerPos < startPos) { throw new InvalidWALDataException("Missing trailer: size=" + size + " startPos=" + startPos); } stream.seek(trailerPos); int version = stream.read(); if (version != TRAILER_VERSION) { throw new InvalidWALDataException( "Invalid Trailer version. got " + version + " expected " + TRAILER_VERSION); } long magic = StreamUtils.readLong(stream); if (magic != TRAILER_MAGIC) { throw new InvalidWALDataException( "Invalid Trailer magic. got " + magic + " expected " + TRAILER_MAGIC); } long trailerOffset = StreamUtils.readLong(stream); stream.seek(trailerOffset); ProcedureWALEntry entry = readEntry(stream); if (entry.getType() != ProcedureWALEntry.Type.PROCEDURE_WAL_EOF) { throw new InvalidWALDataException("Invalid Trailer begin"); } ProcedureWALTrailer trailer = ProcedureWALTrailer.newBuilder().setVersion(version).setTrackerPos(stream.getPos()).build(); return trailer; }
public static void readHiveResult(String path, OutputStreamWriter outStream, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path dir = new Path(path); if (!fs.exists(dir)) { throw new IOException("can not found path:" + path); } FileStatus[] filelist = fs.listStatus(dir); Long bytesRead = 0l; long maxsize = 1024l * 1024 * 1024 * 10; for (FileStatus f : filelist) { if (!f.isDir() && !f.getPath().getName().startsWith("_")) { FSDataInputStream in = fs.open(f.getPath()); BufferedReader bf = new BufferedReader(new InputStreamReader(in)); String line; while ((line = bf.readLine()) != null) { bytesRead += line.getBytes().length; outStream.write(line.replaceAll("\001", ",").replaceAll("\t", ",")); outStream.write("\r\n"); if (bytesRead >= maxsize) { bf.close(); in.close(); return; } } bf.close(); in.close(); } } return; }
private boolean readUntilMatch(byte[] match, boolean withinBlock) throws IOException { int i = 0; while (true) { int b = fsin.read(); // end of file: if (b == -1) { return false; } // save to buffer: if (withinBlock) { buffer.write(b); } // check if we're matching: if (b == match[i]) { i++; if (i >= match.length) { return true; } } else { i = 0; } // see if we've passed the stop point: if (!withinBlock && i == 0 && fsin.getPos() >= end) { return false; } } }
public FSDataInputStream open(Path file, int bufferSize) throws IOException { FTPClient client = connect(); Path workDir = new Path(client.printWorkingDirectory()); Path absolute = makeAbsolute(workDir, file); FileStatus fileStat = getFileStatus(client, absolute); if (fileStat.isDirectory()) { disconnect(client); throw new IOException("Path " + file + " is a directory."); } client.allocate(bufferSize); Path parent = absolute.getParent(); // Change to parent directory on the // server. Only then can we read the // file // on the server by opening up an InputStream. As a side effect the working // directory on the server is changed to the parent directory of the file. // The FTP client connection is closed when close() is called on the // FSDataInputStream. client.changeWorkingDirectory(parent.toUri().getPath()); InputStream is = client.retrieveFileStream(file.getName()); FSDataInputStream fis = new FSDataInputStream(new FTPInputStream(is, client, statistics)); if (!FTPReply.isPositivePreliminary(client.getReplyCode())) { // The ftpClient is an inconsistent state. Must close the stream // which in turn will logout and disconnect from FTP server fis.close(); throw new IOException("Unable to open file: " + file + ", Aborting"); } return fis; }
static long readHdfsFile(FileSystem fs, Path p, long length, Boolean dropBehind) throws Exception { FSDataInputStream fis = null; long totalRead = 0; try { fis = fs.open(p); if (dropBehind != null) { fis.setDropBehind(dropBehind); } byte buf[] = new byte[8196]; while (length > 0) { int amt = (length > buf.length) ? buf.length : (int) length; int ret = fis.read(buf, 0, amt); if (ret == -1) { return totalRead; } totalRead += ret; length -= ret; } } catch (IOException e) { LOG.error("ioexception", e); } finally { if (fis != null) { fis.close(); } } throw new RuntimeException("unreachable"); }
@Test(timeout = 120000) public void testSeekAfterSetDropBehind() throws Exception { // start a cluster LOG.info("testSeekAfterSetDropBehind"); Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; String TEST_PATH = "/test"; int TEST_PATH_LEN = MAX_TEST_FILE_LEN; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, false); // verify that we can seek after setDropBehind FSDataInputStream fis = fs.open(new Path(TEST_PATH)); try { Assert.assertTrue(fis.read() != -1); // create BlockReader fis.setDropBehind(false); // clear BlockReader fis.seek(2); // seek } finally { fis.close(); } } finally { if (cluster != null) { cluster.shutdown(); } } }
public void readFile(String file) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/opt/hadoop-0.20.0/conf/core-site.xml")); FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(file); if (!fileSystem.exists(path)) { System.out.println("File " + file + " does not exists"); return; } FSDataInputStream in = fileSystem.open(path); String filename = file.substring(file.lastIndexOf('/') + 1, file.length()); OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(filename))); byte[] b = new byte[1024]; int numBytes = 0; while ((numBytes = in.read(b)) > 0) { out.write(b, 0, numBytes); } in.close(); out.close(); fileSystem.close(); }
public static void downloadHdfs(String srcfilePath, String destFilePath) { try { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(srcfilePath), conf); FSDataInputStream hdfsInStream = fs.open(new Path(srcfilePath)); File dstFile = new File(destFilePath); if (!dstFile.getParentFile().exists()) { dstFile.getParentFile().mkdirs(); } OutputStream out = new FileOutputStream(destFilePath); byte[] ioBuffer = new byte[1024]; int readLen = hdfsInStream.read(ioBuffer); while (-1 != readLen) { out.write(ioBuffer, 0, readLen); readLen = hdfsInStream.read(ioBuffer); } out.close(); hdfsInStream.close(); fs.close(); } catch (FileNotFoundException e) { LOG.error("[downloadHdfs]", e); } catch (IOException e) { LOG.error("[downloadHdfs]", e); } }
public static void createCentersSequenceFile( Configuration conf, FileSystem fs, String centroidsPath, String sequenceFilePath) throws Exception { Path seqFile = new Path(sequenceFilePath); if (fs.exists(seqFile)) { fs.delete(seqFile, true); } FSDataInputStream inputStream = fs.open(new Path(centroidsPath)); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, seqFile, Centroid.class, IntWritable.class); IntWritable value = new IntWritable(0); while (inputStream.available() > 0) { String line = inputStream.readLine(); StringTokenizer tokenizer = new StringTokenizer(line, " "); int dim = tokenizer.countTokens() - 1; int clusterId = Integer.valueOf(tokenizer.nextToken()); double[] coords = new double[dim]; for (int i = 0; i < dim; i++) { coords[i] = Double.valueOf(tokenizer.nextToken()); } Centroid cluster = new Centroid(clusterId, new Point(coords)); writer.append(cluster, value); } IOUtils.closeStream(writer); inputStream.close(); }
private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset) throws IOException { RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false); Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>(); while (rit.hasNext()) { Path path = rit.next().getPath(); String filename = path.toString().substring(path.getParent().toString().length(), path.toString().length()); if (filename.startsWith("/part-")) { long filesize = fs.getFileStatus(path).getLen(); if (offset < filesize) { FSDataInputStream handle = fs.open(path); if (offset > 0) { handle.seek(offset); } fileHandleList.add(handle); } offset -= filesize; } } if (fileHandleList.size() == 1) return fileHandleList.get(0); else if (fileHandleList.size() > 1) { Enumeration<FSDataInputStream> enu = fileHandleList.elements(); return new SequenceInputStream(enu); } else { System.err.println("Error, no source file loaded. run genSeedDataset.sh fisrt!"); return null; } }
/** * 按行号为单位按行读取文件,行号以1开始,-1代表无限小或无限大 * * @param filePath 文件路径 * @param beginIndex 开始行号 * @param endIndex 结束行号 * @return */ public static List<String> readFileByLinesNoDup(String filePath, int beginIndex, int endIndex) { Set<String> set = new HashSet<String>(); BufferedReader br = null; FSDataInputStream is = null; try { LOG.info("以行为单位读取文件内容,一次读一整行:"); is = fs.open(new Path(filePath)); br = new BufferedReader(new InputStreamReader(is), BUFFER_SIZE); String tempString = null; int lineindex = 0; if (endIndex == -1) { while ((tempString = br.readLine()) != null) { lineindex++; if (lineindex >= beginIndex) set.add(tempString); } } else { while ((tempString = br.readLine()) != null) { lineindex++; if ((lineindex >= beginIndex) && (lineindex <= endIndex)) set.add(tempString); } } if (is != null) { is.close(); } if (br != null) { br.close(); } } catch (IOException e) { LOG.error("读取文件失败:" + e.getMessage()); e.printStackTrace(); } List<String> list = new ArrayList<String>(set.size()); list.addAll(set); return list; }
// to be used for testing public WikipediaRecordReader(URL fileURL, long start, long end) throws IOException { this.start = start; this.end = end; Path path = new Path("file://", fileURL.getPath()); fsin = FileSystem.getLocal(new Configuration()).open(path); fsin.seek(start); fsin.seek(0); }
public ColumnDefinitionFile(Path path) throws IOException { if (path == null) { throw new IOException("A column file path must be specified."); } FSDataInputStream fdis = path.getFileSystem(HadoopUtils.createConfiguration()).open(path); load(fdis); fdis.close(); }
/** * Retrieves the graph root. * * @return the graph root * @throws IOException */ public static String getGraphRoot() throws IOException { Configuration conf = setupConf(); FileSystem fs = FileSystem.get(conf); FSDataInputStream in = fs.open(new Path(SPATH_OUTPUT + SPATH_ROOTFILE)); String username = in.readUTF(); in.close(); return username; }
private byte[] readFile(Path inputPath, long len) throws Exception { FSDataInputStream fsIn = fs.open(inputPath); // state data will not be that "long" byte[] data = new byte[(int) len]; fsIn.readFully(data); fsIn.close(); return data; }
/* * Fetch a file that is in a Hadoop file system. Return a local File. * Interruptible. */ private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException { UUID uniqueId = UUID.randomUUID(); File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); } toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); try { for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); oS.close(); cleanDirNoExceptions(toDir); throw new InterruptedException(); } bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; } catch (ClosedByInterruptException e) { // This can be thrown by the method read. cleanDirNoExceptions(toDir); throw new InterruptedIOException(); } }