public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String cur_file =
       ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName();
   String train_file = context.getConfiguration().get("train_file");
   if (cur_file.equals(train_file)) {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     myKey.set(word);
     myVal.set(f_id);
     context.write(myKey, myVal);
   } else {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     StringBuilder builder = new StringBuilder(dlt);
     while (st.hasMoreTokens()) {
       String filename = st.nextToken();
       String tf_idf = st.nextToken();
       builder.append(filename);
       builder.append(dlt);
       builder.append(tf_idf);
       builder.append("\t");
     }
     myKey.set(word);
     myVal.set(builder.toString());
     context.write(myKey, myVal);
   }
 }
예제 #2
0
    public void map(Text key, Text value, Context context)
        throws InterruptedException, IOException {

      String filename = key.toString();
      String json = value.toString();

      // Make sure the input is valid
      if (!(filename.isEmpty() || json.isEmpty())) {

        // Change the json-type feature to Mat-type feature
        Mat descriptor = json2mat(json);
        if (descriptor != null) {
          // Read the query feature from the cache in Hadoop
          Mat query_features;
          String pathStr = context.getConfiguration().get("featureFilePath");
          FileSystem fs = FileSystem.get(context.getConfiguration());
          FSDataInputStream fsDataInputStream = fs.open(new Path(pathStr));
          StringBuilder sb = new StringBuilder();

          // Use a buffer to read the query_feature
          int remain = fsDataInputStream.available();
          while (remain > 0) {
            int read;
            byte[] buf = new byte[BUF_SIZE];
            read = fsDataInputStream.read(buf, fsDataInputStream.available() - remain, BUF_SIZE);
            sb.append(new String(buf, 0, read, StandardCharsets.UTF_8));
            remain = remain - read;
            System.out.println("remain:" + remain + "\tread:" + read + "\tsb.size:" + sb.length());
          }

          // Read the query_feature line by line
          //                    Scanner sc = new Scanner(fsDataInputStream, "UTF-8");
          //                    StringBuilder sb = new StringBuilder();
          //                    while (sc.hasNextLine()) {
          //                        sb.append(sc.nextLine());
          //                    }
          //                    String query_json = sb.toString();
          //                    String query_json = new String(buf, StandardCharsets.UTF_8);

          String query_json = sb.toString();
          fsDataInputStream.close();
          query_features = json2mat(query_json);

          // Get the similarity of the current database image against the query image
          DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED);
          MatOfDMatch matches = new MatOfDMatch();

          // Ensure the two features have same length of cols (the feature extracted are all 128
          // cols(at least in this case))
          if (query_features.cols() == descriptor.cols()) {

            matcher.match(query_features, descriptor, matches);
            DMatch[] dMatches = matches.toArray();

            // Calculate the max/min distances
            //                    double max_dist = Double.MAX_VALUE;
            //                    double min_dist = Double.MIN_VALUE;
            double max_dist = 0;
            double min_dist = 100;
            for (int i = 0; i < dMatches.length; i++) {
              double dist = dMatches[i].distance;
              if (min_dist > dist) min_dist = dist;
              if (max_dist < dist) max_dist = dist;
            }
            // Only distances ≤ threshold are good matches
            double threshold = max_dist * THRESHOLD_FACTOR;
            //                    double threshold = min_dist * 2;
            LinkedList<DMatch> goodMatches = new LinkedList<DMatch>();

            for (int i = 0; i < dMatches.length; i++) {
              if (dMatches[i].distance <= threshold) {
                goodMatches.addLast(dMatches[i]);
              }
            }

            // Get the ratio of good_matches to all_matches
            double ratio = (double) goodMatches.size() / (double) dMatches.length;

            System.out.println("*** current_record_filename:" + filename + " ***");
            System.out.println("feature:" + descriptor + "\nquery_feature:" + query_features);
            System.out.println(
                "min_dist of keypoints:" + min_dist + "  max_dist of keypoints:" + max_dist);
            System.out.println(
                "total_matches:" + dMatches.length + "\tgood_matches:" + goodMatches.size());
            //                    System.out.println("type:" + descriptor.type() + " channels:" +
            // descriptor.channels() + " rows:" + descriptor.rows() + " cols:" + descriptor.cols());
            //                    System.out.println("qtype:" + query_features.type() + "
            // qchannels:" + query_features.channels() + " qrows:" + query_features.rows() + "
            // qcols:" + query_features.cols());
            System.out.println();

            if (ratio > PERCENTAGE_THRESHOLD) {
              // Key:1        Value:filename|ratio
              context.write(ONE, new Text(filename + "|" + ratio));
              //                        context.write(ONE, new Text(filename + "|" +
              // String.valueOf(goodMatches.size())));
            }
          } else {
            System.out.println("The size of the features are not equal");
          }
        } else {
          // a null pointer, do nothing
          System.out.println("A broken/null feature:" + filename);
          System.out.println();
        }
      }
    }