private void verifyOutputTextFiles( FileSystem fs, Configuration conf, String dir, String prefix, List<String> bodies) throws IOException { int found = 0; int expected = bodies.size(); for (String outputFile : getAllFiles(dir)) { String name = (new File(outputFile)).getName(); if (name.startsWith(prefix)) { FSDataInputStream input = fs.open(new Path(outputFile)); BufferedReader reader = new BufferedReader(new InputStreamReader(input)); String body = null; while ((body = reader.readLine()) != null) { bodies.remove(body); found++; } reader.close(); } } Assert.assertTrue( "Found = " + found + ", Expected = " + expected + ", Left = " + bodies.size() + " " + bodies, bodies.size() == 0); }
protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Path pt = new Path("/user/yao/query/query"); FileSystem fs = FileSystem.get(new Configuration()); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt))); String line = br.readLine(); String[] keywords = line.split(","); k0 = keywords[0]; k1 = keywords[1]; k2 = keywords[2]; br.close(); }
HarFsInputStream(FileSystem fs, Path path, long start, long length, int bufferSize) throws IOException { if (length < 0) { throw new IllegalArgumentException("Negative length [" + length + "]"); } underLyingStream = fs.open(path, bufferSize); underLyingStream.seek(start); // the start of this file in the part file this.start = start; // the position pointer in the part file this.position = start; // the end pointer in the part file this.end = start + length; }
public void map(Text key, Text value, Context context) throws InterruptedException, IOException { String filename = key.toString(); String json = value.toString(); // Make sure the input is valid if (!(filename.isEmpty() || json.isEmpty())) { // Change the json-type feature to Mat-type feature Mat descriptor = json2mat(json); if (descriptor != null) { // Read the query feature from the cache in Hadoop Mat query_features; String pathStr = context.getConfiguration().get("featureFilePath"); FileSystem fs = FileSystem.get(context.getConfiguration()); FSDataInputStream fsDataInputStream = fs.open(new Path(pathStr)); StringBuilder sb = new StringBuilder(); // Use a buffer to read the query_feature int remain = fsDataInputStream.available(); while (remain > 0) { int read; byte[] buf = new byte[BUF_SIZE]; read = fsDataInputStream.read(buf, fsDataInputStream.available() - remain, BUF_SIZE); sb.append(new String(buf, 0, read, StandardCharsets.UTF_8)); remain = remain - read; System.out.println("remain:" + remain + "\tread:" + read + "\tsb.size:" + sb.length()); } // Read the query_feature line by line // Scanner sc = new Scanner(fsDataInputStream, "UTF-8"); // StringBuilder sb = new StringBuilder(); // while (sc.hasNextLine()) { // sb.append(sc.nextLine()); // } // String query_json = sb.toString(); // String query_json = new String(buf, StandardCharsets.UTF_8); String query_json = sb.toString(); fsDataInputStream.close(); query_features = json2mat(query_json); // Get the similarity of the current database image against the query image DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED); MatOfDMatch matches = new MatOfDMatch(); // Ensure the two features have same length of cols (the feature extracted are all 128 // cols(at least in this case)) if (query_features.cols() == descriptor.cols()) { matcher.match(query_features, descriptor, matches); DMatch[] dMatches = matches.toArray(); // Calculate the max/min distances // double max_dist = Double.MAX_VALUE; // double min_dist = Double.MIN_VALUE; double max_dist = 0; double min_dist = 100; for (int i = 0; i < dMatches.length; i++) { double dist = dMatches[i].distance; if (min_dist > dist) min_dist = dist; if (max_dist < dist) max_dist = dist; } // Only distances ≤ threshold are good matches double threshold = max_dist * THRESHOLD_FACTOR; // double threshold = min_dist * 2; LinkedList<DMatch> goodMatches = new LinkedList<DMatch>(); for (int i = 0; i < dMatches.length; i++) { if (dMatches[i].distance <= threshold) { goodMatches.addLast(dMatches[i]); } } // Get the ratio of good_matches to all_matches double ratio = (double) goodMatches.size() / (double) dMatches.length; System.out.println("*** current_record_filename:" + filename + " ***"); System.out.println("feature:" + descriptor + "\nquery_feature:" + query_features); System.out.println( "min_dist of keypoints:" + min_dist + " max_dist of keypoints:" + max_dist); System.out.println( "total_matches:" + dMatches.length + "\tgood_matches:" + goodMatches.size()); // System.out.println("type:" + descriptor.type() + " channels:" + // descriptor.channels() + " rows:" + descriptor.rows() + " cols:" + descriptor.cols()); // System.out.println("qtype:" + query_features.type() + " // qchannels:" + query_features.channels() + " qrows:" + query_features.rows() + " // qcols:" + query_features.cols()); System.out.println(); if (ratio > PERCENTAGE_THRESHOLD) { // Key:1 Value:filename|ratio context.write(ONE, new Text(filename + "|" + ratio)); // context.write(ONE, new Text(filename + "|" + // String.valueOf(goodMatches.size()))); } } else { System.out.println("The size of the features are not equal"); } } else { // a null pointer, do nothing System.out.println("A broken/null feature:" + filename); System.out.println(); } } }
private void parseMetaData() throws IOException { Text line = new Text(); long read; FSDataInputStream in = null; LineReader lin = null; try { in = fs.open(masterIndexPath); FileStatus masterStat = fs.getFileStatus(masterIndexPath); masterIndexTimestamp = masterStat.getModificationTime(); lin = new LineReader(in, getConf()); read = lin.readLine(line); // the first line contains the version of the index file String versionLine = line.toString(); String[] arr = versionLine.split(" "); version = Integer.parseInt(arr[0]); // make it always backwards-compatible if (this.version > HarFileSystem.VERSION) { throw new IOException( "Invalid version " + this.version + " expected " + HarFileSystem.VERSION); } // each line contains a hashcode range and the index file name String[] readStr; while (read < masterStat.getLen()) { int b = lin.readLine(line); read += b; readStr = line.toString().split(" "); int startHash = Integer.parseInt(readStr[0]); int endHash = Integer.parseInt(readStr[1]); stores.add( new Store( Long.parseLong(readStr[2]), Long.parseLong(readStr[3]), startHash, endHash)); line.clear(); } } catch (IOException ioe) { LOG.warn("Encountered exception ", ioe); throw ioe; } finally { IOUtils.cleanup(LOG, lin, in); } FSDataInputStream aIn = fs.open(archiveIndexPath); try { FileStatus archiveStat = fs.getFileStatus(archiveIndexPath); archiveIndexTimestamp = archiveStat.getModificationTime(); LineReader aLin; // now start reading the real index file for (Store s : stores) { read = 0; aIn.seek(s.begin); aLin = new LineReader(aIn, getConf()); while (read + s.begin < s.end) { int tmp = aLin.readLine(line); read += tmp; String lineFeed = line.toString(); String[] parsed = lineFeed.split(" "); parsed[0] = decodeFileName(parsed[0]); archive.put(new Path(parsed[0]), new HarStatus(lineFeed)); line.clear(); } } } finally { IOUtils.cleanup(LOG, aIn); } }