public void close() throws IOException { datas.close(); if (sums != null) { sums.close(); } set(fs.verifyChecksum, null, 1, 0); }
/* * Fetch a file that is in a Hadoop file system. Return a local File. * Interruptible. */ private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException { UUID uniqueId = UUID.randomUUID(); File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); } toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); try { for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); oS.close(); cleanDirNoExceptions(toDir); throw new InterruptedException(); } bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; } catch (ClosedByInterruptException e) { // This can be thrown by the method read. cleanDirNoExceptions(toDir); throw new InterruptedIOException(); } }
public void map(Text key, Text value, Context context) throws InterruptedException, IOException { String filename = key.toString(); String json = value.toString(); // Make sure the input is valid if (!(filename.isEmpty() || json.isEmpty())) { // Change the json-type feature to Mat-type feature Mat descriptor = json2mat(json); if (descriptor != null) { // Read the query feature from the cache in Hadoop Mat query_features; String pathStr = context.getConfiguration().get("featureFilePath"); FileSystem fs = FileSystem.get(context.getConfiguration()); FSDataInputStream fsDataInputStream = fs.open(new Path(pathStr)); StringBuilder sb = new StringBuilder(); // Use a buffer to read the query_feature int remain = fsDataInputStream.available(); while (remain > 0) { int read; byte[] buf = new byte[BUF_SIZE]; read = fsDataInputStream.read(buf, fsDataInputStream.available() - remain, BUF_SIZE); sb.append(new String(buf, 0, read, StandardCharsets.UTF_8)); remain = remain - read; System.out.println("remain:" + remain + "\tread:" + read + "\tsb.size:" + sb.length()); } // Read the query_feature line by line // Scanner sc = new Scanner(fsDataInputStream, "UTF-8"); // StringBuilder sb = new StringBuilder(); // while (sc.hasNextLine()) { // sb.append(sc.nextLine()); // } // String query_json = sb.toString(); // String query_json = new String(buf, StandardCharsets.UTF_8); String query_json = sb.toString(); fsDataInputStream.close(); query_features = json2mat(query_json); // Get the similarity of the current database image against the query image DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED); MatOfDMatch matches = new MatOfDMatch(); // Ensure the two features have same length of cols (the feature extracted are all 128 // cols(at least in this case)) if (query_features.cols() == descriptor.cols()) { matcher.match(query_features, descriptor, matches); DMatch[] dMatches = matches.toArray(); // Calculate the max/min distances // double max_dist = Double.MAX_VALUE; // double min_dist = Double.MIN_VALUE; double max_dist = 0; double min_dist = 100; for (int i = 0; i < dMatches.length; i++) { double dist = dMatches[i].distance; if (min_dist > dist) min_dist = dist; if (max_dist < dist) max_dist = dist; } // Only distances ≤ threshold are good matches double threshold = max_dist * THRESHOLD_FACTOR; // double threshold = min_dist * 2; LinkedList<DMatch> goodMatches = new LinkedList<DMatch>(); for (int i = 0; i < dMatches.length; i++) { if (dMatches[i].distance <= threshold) { goodMatches.addLast(dMatches[i]); } } // Get the ratio of good_matches to all_matches double ratio = (double) goodMatches.size() / (double) dMatches.length; System.out.println("*** current_record_filename:" + filename + " ***"); System.out.println("feature:" + descriptor + "\nquery_feature:" + query_features); System.out.println( "min_dist of keypoints:" + min_dist + " max_dist of keypoints:" + max_dist); System.out.println( "total_matches:" + dMatches.length + "\tgood_matches:" + goodMatches.size()); // System.out.println("type:" + descriptor.type() + " channels:" + // descriptor.channels() + " rows:" + descriptor.rows() + " cols:" + descriptor.cols()); // System.out.println("qtype:" + query_features.type() + " // qchannels:" + query_features.channels() + " qrows:" + query_features.rows() + " // qcols:" + query_features.cols()); System.out.println(); if (ratio > PERCENTAGE_THRESHOLD) { // Key:1 Value:filename|ratio context.write(ONE, new Text(filename + "|" + ratio)); // context.write(ONE, new Text(filename + "|" + // String.valueOf(goodMatches.size()))); } } else { System.out.println("The size of the features are not equal"); } } else { // a null pointer, do nothing System.out.println("A broken/null feature:" + filename); System.out.println(); } } }
/** * Check how prefetch override works. * * @throws Exception IF failed. */ public void testOpenPrefetchOverride() throws Exception { create(igfsSecondary, paths(DIR, SUBDIR), paths(FILE)); // Write enough data to the secondary file system. final int blockSize = IGFS_BLOCK_SIZE; IgfsOutputStream out = igfsSecondary.append(FILE, false); int totalWritten = 0; while (totalWritten < blockSize * 2 + chunk.length) { out.write(chunk); totalWritten += chunk.length; } out.close(); awaitFileClose(igfsSecondary.asSecondary(), FILE); // Instantiate file system with overridden "seq reads before prefetch" property. Configuration cfg = new Configuration(); cfg.addResource(U.resolveIgniteUrl(PRIMARY_CFG)); int seqReads = SEQ_READS_BEFORE_PREFETCH + 1; cfg.setInt(String.format(PARAM_IGFS_SEQ_READS_BEFORE_PREFETCH, "igfs:grid@"), seqReads); FileSystem fs = FileSystem.get(new URI(PRIMARY_URI), cfg); // Read the first two blocks. Path fsHome = new Path(PRIMARY_URI); Path dir = new Path(fsHome, DIR.name()); Path subdir = new Path(dir, SUBDIR.name()); Path file = new Path(subdir, FILE.name()); FSDataInputStream fsIn = fs.open(file); final byte[] readBuf = new byte[blockSize * 2]; fsIn.readFully(0, readBuf, 0, readBuf.length); // Wait for a while for prefetch to finish (if any). IgfsMetaManager meta = igfs.context().meta(); IgfsFileInfo info = meta.info(meta.fileId(FILE)); IgfsBlockKey key = new IgfsBlockKey(info.id(), info.affinityKey(), info.evictExclude(), 2); IgniteCache<IgfsBlockKey, byte[]> dataCache = igfs.context().kernalContext().cache().jcache(igfs.configuration().getDataCacheName()); for (int i = 0; i < 10; i++) { if (dataCache.containsKey(key)) break; else U.sleep(100); } fsIn.close(); // Remove the file from the secondary file system. igfsSecondary.delete(FILE, false); // Try reading the third block. Should fail. GridTestUtils.assertThrows( log, new Callable<Object>() { @Override public Object call() throws Exception { IgfsInputStream in0 = igfs.open(FILE); in0.seek(blockSize * 2); try { in0.read(readBuf); } finally { U.closeQuiet(in0); } return null; } }, IOException.class, "Failed to read data due to secondary file system exception: /dir/subdir/file"); }