public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String cur_file = ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName(); String train_file = context.getConfiguration().get("train_file"); if (cur_file.equals(train_file)) { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); myKey.set(word); myVal.set(f_id); context.write(myKey, myVal); } else { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); StringBuilder builder = new StringBuilder(dlt); while (st.hasMoreTokens()) { String filename = st.nextToken(); String tf_idf = st.nextToken(); builder.append(filename); builder.append(dlt); builder.append(tf_idf); builder.append("\t"); } myKey.set(word); myVal.set(builder.toString()); context.write(myKey, myVal); } }
public void testInputFormat() { try { JobConf conf = new JobConf(); String TMP_DIR = System.getProperty("test.build.data", "/tmp"); Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile"); SequenceFile.Writer sfw = SequenceFile.createWriter( FileSystem.getLocal(conf), conf, filename, ChukwaArchiveKey.class, ChunkImpl.class, SequenceFile.CompressionType.NONE, Reporter.NULL); StringBuilder buf = new StringBuilder(); int offsets[] = new int[lines.length]; for (int i = 0; i < lines.length; ++i) { buf.append(lines[i]); buf.append("\n"); offsets[i] = buf.length() - 1; } ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0); ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null); val.setRecordOffsets(offsets); sfw.append(key, val); sfw.append(key, val); // write it twice sfw.close(); long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen(); InputSplit split = new FileSplit(filename, 0, len, (String[]) null); ChukwaInputFormat in = new ChukwaInputFormat(); RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL); LongWritable l = r.createKey(); Text line = r.createValue(); for (int i = 0; i < lines.length * 2; ++i) { boolean succeeded = r.next(l, line); assertTrue(succeeded); assertEquals(i, l.get()); assertEquals(lines[i % lines.length], line.toString()); System.out.println("read line: " + l.get() + " " + line); } boolean succeeded = r.next(l, line); assertFalse(succeeded); } catch (IOException e) { e.printStackTrace(); fail("IO exception " + e); } }
public void map( Object key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { // id color // id color 1/0 "COLOR" String[] tokens = value.toString().split("\\s+"); IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0])); StringBuilder sb = new StringBuilder(); for (int i = 1; i < tokens.length; i++) { if (sb.length() != 0) sb.append(" "); sb.append(tokens[i]); } output.collect(SourceId, new Text(sb.toString())); }
public String toString() { StringBuilder buffer = new StringBuilder(); buffer.append(methodName); buffer.append("("); for (int i = 0; i < parameters.length; i++) { if (i != 0) buffer.append(", "); buffer.append(parameters[i]); } buffer.append(")"); buffer.append(", rpc version=" + rpcVersion); buffer.append(", client version=" + clientVersion); buffer.append(", methodsFingerPrint=" + clientMethodsHash); return buffer.toString(); }
public static String readHDFSFile(String path, Configuration conf) throws IOException { Path pt = new Path(path); FileSystem fs = FileSystem.get(pt.toUri(), conf); FSDataInputStream file = fs.open(pt); BufferedReader buffIn = new BufferedReader(new InputStreamReader(file)); StringBuilder everything = new StringBuilder(); String line; while ((line = buffIn.readLine()) != null) { everything.append(line); everything.append("\n"); } return everything.toString(); }
public String toString() { StringBuilder result = new StringBuilder(); result.append(offset); result.append(','); result.append(length); for (String h : hosts) { result.append(','); result.append(h); } return result.toString(); }
/** * Make a path relative with respect to a root path. absPath is always assumed to descend from * root. Otherwise returned path is null. */ static String makeRelative(Path root, Path absPath) { if (!absPath.isAbsolute()) { throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath); } String p = absPath.toUri().getPath(); StringTokenizer pathTokens = new StringTokenizer(p, "/"); for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/"); rootTokens.hasMoreTokens(); ) { if (!rootTokens.nextToken().equals(pathTokens.nextToken())) { return null; } } StringBuilder sb = new StringBuilder(); for (; pathTokens.hasMoreTokens(); ) { sb.append(pathTokens.nextToken()); if (pathTokens.hasMoreTokens()) { sb.append(Path.SEPARATOR); } } return sb.length() == 0 ? "." : sb.toString(); }
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String[] pair = new String[2]; int count = 0; for (Text txt : values) { pair[count] = txt.toString(); count++; } // word exists in training if (count == 2) { StringTokenizer st_one, st_two; if (pair[0].contains(dlt)) { st_one = new StringTokenizer(pair[1]); st_two = new StringTokenizer(pair[0]); } else { st_one = new StringTokenizer(pair[0]); st_two = new StringTokenizer(pair[1]); } // outputting the data String f_id = st_one.nextToken(); StringBuilder builder = new StringBuilder(dlt); builder.append(f_id); builder.append(dlt); while (st_two.hasMoreTokens()) { String filename = st_two.nextToken(); String tf_idf = st_two.nextToken(); builder.append(filename); builder.append(dlt); builder.append(tf_idf); builder.append("\t"); } myVal.set(builder.toString()); context.write(key, myVal); } }
public void map(Text key, Text value, Context context) throws InterruptedException, IOException { String filename = key.toString(); String json = value.toString(); // Make sure the input is valid if (!(filename.isEmpty() || json.isEmpty())) { // Change the json-type feature to Mat-type feature Mat descriptor = json2mat(json); if (descriptor != null) { // Read the query feature from the cache in Hadoop Mat query_features; String pathStr = context.getConfiguration().get("featureFilePath"); FileSystem fs = FileSystem.get(context.getConfiguration()); FSDataInputStream fsDataInputStream = fs.open(new Path(pathStr)); StringBuilder sb = new StringBuilder(); // Use a buffer to read the query_feature int remain = fsDataInputStream.available(); while (remain > 0) { int read; byte[] buf = new byte[BUF_SIZE]; read = fsDataInputStream.read(buf, fsDataInputStream.available() - remain, BUF_SIZE); sb.append(new String(buf, 0, read, StandardCharsets.UTF_8)); remain = remain - read; System.out.println("remain:" + remain + "\tread:" + read + "\tsb.size:" + sb.length()); } // Read the query_feature line by line // Scanner sc = new Scanner(fsDataInputStream, "UTF-8"); // StringBuilder sb = new StringBuilder(); // while (sc.hasNextLine()) { // sb.append(sc.nextLine()); // } // String query_json = sb.toString(); // String query_json = new String(buf, StandardCharsets.UTF_8); String query_json = sb.toString(); fsDataInputStream.close(); query_features = json2mat(query_json); // Get the similarity of the current database image against the query image DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED); MatOfDMatch matches = new MatOfDMatch(); // Ensure the two features have same length of cols (the feature extracted are all 128 // cols(at least in this case)) if (query_features.cols() == descriptor.cols()) { matcher.match(query_features, descriptor, matches); DMatch[] dMatches = matches.toArray(); // Calculate the max/min distances // double max_dist = Double.MAX_VALUE; // double min_dist = Double.MIN_VALUE; double max_dist = 0; double min_dist = 100; for (int i = 0; i < dMatches.length; i++) { double dist = dMatches[i].distance; if (min_dist > dist) min_dist = dist; if (max_dist < dist) max_dist = dist; } // Only distances ≤ threshold are good matches double threshold = max_dist * THRESHOLD_FACTOR; // double threshold = min_dist * 2; LinkedList<DMatch> goodMatches = new LinkedList<DMatch>(); for (int i = 0; i < dMatches.length; i++) { if (dMatches[i].distance <= threshold) { goodMatches.addLast(dMatches[i]); } } // Get the ratio of good_matches to all_matches double ratio = (double) goodMatches.size() / (double) dMatches.length; System.out.println("*** current_record_filename:" + filename + " ***"); System.out.println("feature:" + descriptor + "\nquery_feature:" + query_features); System.out.println( "min_dist of keypoints:" + min_dist + " max_dist of keypoints:" + max_dist); System.out.println( "total_matches:" + dMatches.length + "\tgood_matches:" + goodMatches.size()); // System.out.println("type:" + descriptor.type() + " channels:" + // descriptor.channels() + " rows:" + descriptor.rows() + " cols:" + descriptor.cols()); // System.out.println("qtype:" + query_features.type() + " // qchannels:" + query_features.channels() + " qrows:" + query_features.rows() + " // qcols:" + query_features.cols()); System.out.println(); if (ratio > PERCENTAGE_THRESHOLD) { // Key:1 Value:filename|ratio context.write(ONE, new Text(filename + "|" + ratio)); // context.write(ONE, new Text(filename + "|" + // String.valueOf(goodMatches.size()))); } } else { System.out.println("The size of the features are not equal"); } } else { // a null pointer, do nothing System.out.println("A broken/null feature:" + filename); System.out.println(); } } }
public void map( Object key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { // vid neighbors_num n1 n2 ... // vid color 1/0 "COLOR" String str = value.toString(); if (str.endsWith(COLOR)) { // color table String[] tokens = str.substring(0, str.length() - 5).split("\\s+"); int change = Integer.parseInt(tokens[2]); if (change == 1) { IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0])); StringBuilder sb = new StringBuilder(); sb.append(tokens[1]); sb.append(" "); sb.append(tokens[2]); sb.append(COLOR); output.collect(SourceId, new Text(sb.toString())); } } else { // edge table String[] tokens = value.toString().split("\\s+"); IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0])); StringBuilder sb = new StringBuilder(); for (int i = 1; i < tokens.length; i++) { if (sb.length() != 0) sb.append(" "); sb.append(tokens[i]); } output.collect(SourceId, new Text(sb.toString())); } }