public static void createCentersSequenceFile( Configuration conf, FileSystem fs, String centroidsPath, String sequenceFilePath) throws Exception { Path seqFile = new Path(sequenceFilePath); if (fs.exists(seqFile)) { fs.delete(seqFile, true); } FSDataInputStream inputStream = fs.open(new Path(centroidsPath)); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, seqFile, Centroid.class, IntWritable.class); IntWritable value = new IntWritable(0); while (inputStream.available() > 0) { String line = inputStream.readLine(); StringTokenizer tokenizer = new StringTokenizer(line, " "); int dim = tokenizer.countTokens() - 1; int clusterId = Integer.valueOf(tokenizer.nextToken()); double[] coords = new double[dim]; for (int i = 0; i < dim; i++) { coords[i] = Double.valueOf(tokenizer.nextToken()); } Centroid cluster = new Centroid(clusterId, new Point(coords)); writer.append(cluster, value); } IOUtils.closeStream(writer); inputStream.close(); }
@Override public boolean next(LongWritable key, ProtobufWritable<M> value) throws IOException { if (pos_ > end_ || fileIn_.available() <= 0) { return false; } key.set(pos_); if (Protobufs.KNOWN_GOOD_POSITION_MARKER.length + pos_ <= end_) fileIn_.skipBytes(Protobufs.KNOWN_GOOD_POSITION_MARKER.length); pos_ = fileIn_.getPos(); try { if (protoBuilder == null) { protoBuilder = Protobufs.getMessageBuilder(typeRef_.getRawClass()); } Message.Builder builder = protoBuilder.clone(); final boolean success = builder.mergeDelimitedFrom(fileIn_); if (success) { value.set((M) builder.build()); } return success; } catch (InvalidProtocolBufferException e) { LOG.error( "Invalid Protobuf exception while building " + typeRef_.getRawClass().getName(), e); } catch (UninitializedMessageException ume) { LOG.error( "Uninitialized Message Exception while building " + typeRef_.getRawClass().getName(), ume); } return false; }
public static void main(String args[]) throws Exception { if (args.length != 2) { System.err.println("argumentos: dir-de-entrada arquivo-de-saida"); System.exit(1); } FileSystem fs = FileSystem.get(confHadoop); Path inPath = new Path(args[0]); Path outPath = new Path(args[1] + "/dataset"); FSDataInputStream in = null; SequenceFile.Writer writer = null; List<Path> files = listFiles(inPath, jpegFilter); try { writer = SequenceFile.createWriter(fs, confHadoop, outPath, Text.class, BytesWritable.class); for (Path p : files) { in = fs.open(p); byte buffer[] = new byte[in.available()]; in.readFully(buffer); writer.append(new Text(p.getName()), new BytesWritable(buffer)); in.close(); } } finally { IOUtils.closeStream(writer); } }
private String retrieveLineSeparator(FSDataInputStream fis) throws IOException { char current; String lineSeparator = ""; while (fis.available() > 0) { current = (char) fis.read(); if ((current == '\n') || (current == '\r')) { lineSeparator += current; if (fis.available() > 0) { char next = (char) fis.read(); if ((next == '\r') || (next == '\n')) { lineSeparator += next; } } return lineSeparator; } } return null; }
@Override public int available() throws IOException { try { return in.available(); } catch (FileNotFoundException e) { return tryOpen().available(); } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() return tryOpen().available(); } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() return tryOpen().available(); } }
/** * Read the side file to get the last flush length. * * @param fs the file system to use * @param deltaFile the path of the delta file * @return the maximum size of the file to use * @throws IOException */ private static long getLastFlushLength(FileSystem fs, Path deltaFile) throws IOException { Path lengths = OrcRecordUpdater.getSideFile(deltaFile); long result = Long.MAX_VALUE; try { FSDataInputStream stream = fs.open(lengths); result = -1; while (stream.available() > 0) { result = stream.readLong(); } stream.close(); return result; } catch (IOException ioe) { return result; } }
private void loadUnforwardedCounts() throws IOException { unforwardedCounts = new HashMap<Unforwarded, Long>(); FSDataInputStream in = getHdfs().open(getUnforwardedCountsPath()); while (in.available() > 0) { String u = in.readUTF(); Long count = in.readLong(); unforwardedCounts.put(Unforwarded.valueOf(u), count); } in.close(); }
// @Test // Check file/read write public void testFileIO() throws Exception { Path subDir1 = new Path("dir.1"); Path file1 = new Path("dir.1/foo.1"); kosmosFileSystem.mkdirs(baseDir); assertTrue(kosmosFileSystem.isDirectory(baseDir)); kosmosFileSystem.setWorkingDirectory(baseDir); kosmosFileSystem.mkdirs(subDir1); FSDataOutputStream s1 = kosmosFileSystem.create(file1, true, 4096, (short) 1, (long) 4096, null); int bufsz = 4096; byte[] data = new byte[bufsz]; for (int i = 0; i < data.length; i++) data[i] = (byte) (i % 16); // write 4 bytes and read them back; read API should return a byte per call s1.write(32); s1.write(32); s1.write(32); s1.write(32); // write some data s1.write(data, 0, data.length); // flush out the changes s1.close(); // Read the stuff back and verify it is correct FSDataInputStream s2 = kosmosFileSystem.open(file1, 4096); int v; long nread = 0; v = s2.read(); assertEquals(v, 32); v = s2.read(); assertEquals(v, 32); v = s2.read(); assertEquals(v, 32); v = s2.read(); assertEquals(v, 32); assertEquals(s2.available(), data.length); byte[] buf = new byte[bufsz]; s2.read(buf, 0, buf.length); nread = s2.getPos(); for (int i = 0; i < data.length; i++) assertEquals(data[i], buf[i]); assertEquals(s2.available(), 0); s2.close(); // append some data to the file try { s1 = kosmosFileSystem.append(file1); for (int i = 0; i < data.length; i++) data[i] = (byte) (i % 17); // write the data s1.write(data, 0, data.length); // flush out the changes s1.close(); // read it back and validate s2 = kosmosFileSystem.open(file1, 4096); s2.seek(nread); s2.read(buf, 0, buf.length); for (int i = 0; i < data.length; i++) assertEquals(data[i], buf[i]); s2.close(); } catch (Exception e) { System.out.println("append isn't supported by the underlying fs"); } kosmosFileSystem.delete(file1, true); assertFalse(kosmosFileSystem.exists(file1)); kosmosFileSystem.delete(subDir1, true); assertFalse(kosmosFileSystem.exists(subDir1)); kosmosFileSystem.delete(baseDir, true); assertFalse(kosmosFileSystem.exists(baseDir)); }
public void map(Text key, Text value, Context context) throws InterruptedException, IOException { String filename = key.toString(); String json = value.toString(); // Make sure the input is valid if (!(filename.isEmpty() || json.isEmpty())) { // Change the json-type feature to Mat-type feature Mat descriptor = json2mat(json); if (descriptor != null) { // Read the query feature from the cache in Hadoop Mat query_features; String pathStr = context.getConfiguration().get("featureFilePath"); FileSystem fs = FileSystem.get(context.getConfiguration()); FSDataInputStream fsDataInputStream = fs.open(new Path(pathStr)); StringBuilder sb = new StringBuilder(); // Use a buffer to read the query_feature int remain = fsDataInputStream.available(); while (remain > 0) { int read; byte[] buf = new byte[BUF_SIZE]; read = fsDataInputStream.read(buf, fsDataInputStream.available() - remain, BUF_SIZE); sb.append(new String(buf, 0, read, StandardCharsets.UTF_8)); remain = remain - read; System.out.println("remain:" + remain + "\tread:" + read + "\tsb.size:" + sb.length()); } // Read the query_feature line by line // Scanner sc = new Scanner(fsDataInputStream, "UTF-8"); // StringBuilder sb = new StringBuilder(); // while (sc.hasNextLine()) { // sb.append(sc.nextLine()); // } // String query_json = sb.toString(); // String query_json = new String(buf, StandardCharsets.UTF_8); String query_json = sb.toString(); fsDataInputStream.close(); query_features = json2mat(query_json); // Get the similarity of the current database image against the query image DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED); MatOfDMatch matches = new MatOfDMatch(); // Ensure the two features have same length of cols (the feature extracted are all 128 // cols(at least in this case)) if (query_features.cols() == descriptor.cols()) { matcher.match(query_features, descriptor, matches); DMatch[] dMatches = matches.toArray(); // Calculate the max/min distances // double max_dist = Double.MAX_VALUE; // double min_dist = Double.MIN_VALUE; double max_dist = 0; double min_dist = 100; for (int i = 0; i < dMatches.length; i++) { double dist = dMatches[i].distance; if (min_dist > dist) min_dist = dist; if (max_dist < dist) max_dist = dist; } // Only distances ≤ threshold are good matches double threshold = max_dist * THRESHOLD_FACTOR; // double threshold = min_dist * 2; LinkedList<DMatch> goodMatches = new LinkedList<DMatch>(); for (int i = 0; i < dMatches.length; i++) { if (dMatches[i].distance <= threshold) { goodMatches.addLast(dMatches[i]); } } // Get the ratio of good_matches to all_matches double ratio = (double) goodMatches.size() / (double) dMatches.length; System.out.println("*** current_record_filename:" + filename + " ***"); System.out.println("feature:" + descriptor + "\nquery_feature:" + query_features); System.out.println( "min_dist of keypoints:" + min_dist + " max_dist of keypoints:" + max_dist); System.out.println( "total_matches:" + dMatches.length + "\tgood_matches:" + goodMatches.size()); // System.out.println("type:" + descriptor.type() + " channels:" + // descriptor.channels() + " rows:" + descriptor.rows() + " cols:" + descriptor.cols()); // System.out.println("qtype:" + query_features.type() + " // qchannels:" + query_features.channels() + " qrows:" + query_features.rows() + " // qcols:" + query_features.cols()); System.out.println(); if (ratio > PERCENTAGE_THRESHOLD) { // Key:1 Value:filename|ratio context.write(ONE, new Text(filename + "|" + ratio)); // context.write(ONE, new Text(filename + "|" + // String.valueOf(goodMatches.size()))); } } else { System.out.println("The size of the features are not equal"); } } else { // a null pointer, do nothing System.out.println("A broken/null feature:" + filename); System.out.println(); } } }
@Override public int available() throws IOException { return fsDataInputStream.available(); }
public void testFileIO() throws Exception { Path subDir1 = new Path("tfio_dir.1"); Path file1 = new Path("tfio_dir.1/foo.1"); Path baseDir = new Path("tfio_testDirs1"); fs.mkdirs(baseDir); assertTrue(fs.isDirectory(baseDir)); // fs.setWorkingDirectory(baseDir); fs.mkdirs(subDir1); FSDataOutputStream s1 = fs.create(file1, true, 4096, (short) 1, (long) 4096, null); int bufsz = 4096; byte[] data = new byte[bufsz]; for (int i = 0; i < data.length; i++) data[i] = (byte) (i % 16); // write 4 bytes and read them back; read API should return a byte per // call s1.write(32); s1.write(32); s1.write(32); s1.write(32); // write some data s1.write(data, 0, data.length); // flush out the changes s1.close(); // Read the stuff back and verify it is correct FSDataInputStream s2 = fs.open(file1, 4096); int v; v = s2.read(); assertEquals(v, 32); v = s2.read(); assertEquals(v, 32); v = s2.read(); assertEquals(v, 32); v = s2.read(); assertEquals(v, 32); assertEquals(s2.available(), data.length); byte[] buf = new byte[bufsz]; s2.read(buf, 0, buf.length); for (int i = 0; i < data.length; i++) assertEquals(data[i], buf[i]); assertEquals(s2.available(), 0); s2.close(); fs.delete(file1, true); assertFalse(fs.exists(file1)); fs.delete(subDir1, true); assertFalse(fs.exists(subDir1)); fs.delete(baseDir, true); assertFalse(fs.exists(baseDir)); fs.delete(subDir1); fs.delete(file1); fs.delete(baseDir); }
// in rare circumstances, two logs can be left uncapped (lacking a commit at the end signifying // that all the content in the log was committed) @Test public void testRecoveryMultipleLogs() throws Exception { try { DirectUpdateHandler2.commitOnClose = false; final Semaphore logReplay = new Semaphore(0); final Semaphore logReplayFinish = new Semaphore(0); UpdateLog.testing_logReplayHook = new Runnable() { @Override public void run() { try { assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS)); } catch (Exception e) { throw new RuntimeException(e); } } }; UpdateLog.testing_logReplayFinishHook = new Runnable() { @Override public void run() { logReplayFinish.release(); } }; String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir(); clearIndex(); assertU(commit()); assertU(adoc("id", "AAAAAA")); assertU(adoc("id", "BBBBBB")); assertU(adoc("id", "CCCCCC")); h.close(); String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir)); Arrays.sort(files); String fname = files[files.length - 1]; FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length - 1])); dos.writeLong(0xffffffffffffffffL); dos.writeChars( "This should be appended to a good log file, representing a bad partially written record."); dos.close(); FSDataInputStream dis = fs.open(new Path(logDir, files[files.length - 1])); byte[] content = new byte[(int) dis.available()]; dis.readFully(content); dis.close(); // Now make a newer log file with just the IDs changed. NOTE: this may not work if log format // changes too much! findReplace( "AAAAAA".getBytes(StandardCharsets.UTF_8), "aaaaaa".getBytes(StandardCharsets.UTF_8), content); findReplace( "BBBBBB".getBytes(StandardCharsets.UTF_8), "bbbbbb".getBytes(StandardCharsets.UTF_8), content); findReplace( "CCCCCC".getBytes(StandardCharsets.UTF_8), "cccccc".getBytes(StandardCharsets.UTF_8), content); // WARNING... assumes format of .00000n where n is less than 9 long logNumber = Long.parseLong(fname.substring(fname.lastIndexOf(".") + 1)); String fname2 = String.format( Locale.ROOT, UpdateLog.LOG_FILENAME_PATTERN, UpdateLog.TLOG_NAME, logNumber + 1); dos = fs.create(new Path(logDir, fname2), (short) 1); dos.write(content); dos.close(); logReplay.release(1000); logReplayFinish.drainPermits(); ignoreException( "OutOfBoundsException"); // this is what the corrupted log currently produces... subject // to change. createCore(); assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS)); resetExceptionIgnores(); assertJQ(req("q", "*:*"), "/response/numFound==6"); } finally { DirectUpdateHandler2.commitOnClose = true; UpdateLog.testing_logReplayHook = null; UpdateLog.testing_logReplayFinishHook = null; } }