Beispiel #1
0
 private void verifyOutputTextFiles(
     FileSystem fs, Configuration conf, String dir, String prefix, List<String> bodies)
     throws IOException {
   int found = 0;
   int expected = bodies.size();
   for (String outputFile : getAllFiles(dir)) {
     String name = (new File(outputFile)).getName();
     if (name.startsWith(prefix)) {
       FSDataInputStream input = fs.open(new Path(outputFile));
       BufferedReader reader = new BufferedReader(new InputStreamReader(input));
       String body = null;
       while ((body = reader.readLine()) != null) {
         bodies.remove(body);
         found++;
       }
       reader.close();
     }
   }
   Assert.assertTrue(
       "Found = "
           + found
           + ", Expected = "
           + expected
           + ", Left = "
           + bodies.size()
           + " "
           + bodies,
       bodies.size() == 0);
 }
Beispiel #2
0
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);

      Path pt = new Path("/user/yao/query/query");
      FileSystem fs = FileSystem.get(new Configuration());
      BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt)));
      String line = br.readLine();
      String[] keywords = line.split(",");
      k0 = keywords[0];
      k1 = keywords[1];
      k2 = keywords[2];
      br.close();
    }
Beispiel #3
0
 HarFsInputStream(FileSystem fs, Path path, long start, long length, int bufferSize)
     throws IOException {
   if (length < 0) {
     throw new IllegalArgumentException("Negative length [" + length + "]");
   }
   underLyingStream = fs.open(path, bufferSize);
   underLyingStream.seek(start);
   // the start of this file in the part file
   this.start = start;
   // the position pointer in the part file
   this.position = start;
   // the end pointer in the part file
   this.end = start + length;
 }
    public void map(Text key, Text value, Context context)
        throws InterruptedException, IOException {

      String filename = key.toString();
      String json = value.toString();

      // Make sure the input is valid
      if (!(filename.isEmpty() || json.isEmpty())) {

        // Change the json-type feature to Mat-type feature
        Mat descriptor = json2mat(json);
        if (descriptor != null) {
          // Read the query feature from the cache in Hadoop
          Mat query_features;
          String pathStr = context.getConfiguration().get("featureFilePath");
          FileSystem fs = FileSystem.get(context.getConfiguration());
          FSDataInputStream fsDataInputStream = fs.open(new Path(pathStr));
          StringBuilder sb = new StringBuilder();

          // Use a buffer to read the query_feature
          int remain = fsDataInputStream.available();
          while (remain > 0) {
            int read;
            byte[] buf = new byte[BUF_SIZE];
            read = fsDataInputStream.read(buf, fsDataInputStream.available() - remain, BUF_SIZE);
            sb.append(new String(buf, 0, read, StandardCharsets.UTF_8));
            remain = remain - read;
            System.out.println("remain:" + remain + "\tread:" + read + "\tsb.size:" + sb.length());
          }

          // Read the query_feature line by line
          //                    Scanner sc = new Scanner(fsDataInputStream, "UTF-8");
          //                    StringBuilder sb = new StringBuilder();
          //                    while (sc.hasNextLine()) {
          //                        sb.append(sc.nextLine());
          //                    }
          //                    String query_json = sb.toString();
          //                    String query_json = new String(buf, StandardCharsets.UTF_8);

          String query_json = sb.toString();
          fsDataInputStream.close();
          query_features = json2mat(query_json);

          // Get the similarity of the current database image against the query image
          DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED);
          MatOfDMatch matches = new MatOfDMatch();

          // Ensure the two features have same length of cols (the feature extracted are all 128
          // cols(at least in this case))
          if (query_features.cols() == descriptor.cols()) {

            matcher.match(query_features, descriptor, matches);
            DMatch[] dMatches = matches.toArray();

            // Calculate the max/min distances
            //                    double max_dist = Double.MAX_VALUE;
            //                    double min_dist = Double.MIN_VALUE;
            double max_dist = 0;
            double min_dist = 100;
            for (int i = 0; i < dMatches.length; i++) {
              double dist = dMatches[i].distance;
              if (min_dist > dist) min_dist = dist;
              if (max_dist < dist) max_dist = dist;
            }
            // Only distances ≤ threshold are good matches
            double threshold = max_dist * THRESHOLD_FACTOR;
            //                    double threshold = min_dist * 2;
            LinkedList<DMatch> goodMatches = new LinkedList<DMatch>();

            for (int i = 0; i < dMatches.length; i++) {
              if (dMatches[i].distance <= threshold) {
                goodMatches.addLast(dMatches[i]);
              }
            }

            // Get the ratio of good_matches to all_matches
            double ratio = (double) goodMatches.size() / (double) dMatches.length;

            System.out.println("*** current_record_filename:" + filename + " ***");
            System.out.println("feature:" + descriptor + "\nquery_feature:" + query_features);
            System.out.println(
                "min_dist of keypoints:" + min_dist + "  max_dist of keypoints:" + max_dist);
            System.out.println(
                "total_matches:" + dMatches.length + "\tgood_matches:" + goodMatches.size());
            //                    System.out.println("type:" + descriptor.type() + " channels:" +
            // descriptor.channels() + " rows:" + descriptor.rows() + " cols:" + descriptor.cols());
            //                    System.out.println("qtype:" + query_features.type() + "
            // qchannels:" + query_features.channels() + " qrows:" + query_features.rows() + "
            // qcols:" + query_features.cols());
            System.out.println();

            if (ratio > PERCENTAGE_THRESHOLD) {
              // Key:1        Value:filename|ratio
              context.write(ONE, new Text(filename + "|" + ratio));
              //                        context.write(ONE, new Text(filename + "|" +
              // String.valueOf(goodMatches.size())));
            }
          } else {
            System.out.println("The size of the features are not equal");
          }
        } else {
          // a null pointer, do nothing
          System.out.println("A broken/null feature:" + filename);
          System.out.println();
        }
      }
    }
Beispiel #5
0
    private void parseMetaData() throws IOException {
      Text line = new Text();
      long read;
      FSDataInputStream in = null;
      LineReader lin = null;

      try {
        in = fs.open(masterIndexPath);
        FileStatus masterStat = fs.getFileStatus(masterIndexPath);
        masterIndexTimestamp = masterStat.getModificationTime();
        lin = new LineReader(in, getConf());
        read = lin.readLine(line);

        // the first line contains the version of the index file
        String versionLine = line.toString();
        String[] arr = versionLine.split(" ");
        version = Integer.parseInt(arr[0]);
        // make it always backwards-compatible
        if (this.version > HarFileSystem.VERSION) {
          throw new IOException(
              "Invalid version " + this.version + " expected " + HarFileSystem.VERSION);
        }

        // each line contains a hashcode range and the index file name
        String[] readStr;
        while (read < masterStat.getLen()) {
          int b = lin.readLine(line);
          read += b;
          readStr = line.toString().split(" ");
          int startHash = Integer.parseInt(readStr[0]);
          int endHash = Integer.parseInt(readStr[1]);
          stores.add(
              new Store(
                  Long.parseLong(readStr[2]), Long.parseLong(readStr[3]), startHash, endHash));
          line.clear();
        }
      } catch (IOException ioe) {
        LOG.warn("Encountered exception ", ioe);
        throw ioe;
      } finally {
        IOUtils.cleanup(LOG, lin, in);
      }

      FSDataInputStream aIn = fs.open(archiveIndexPath);
      try {
        FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
        archiveIndexTimestamp = archiveStat.getModificationTime();
        LineReader aLin;

        // now start reading the real index file
        for (Store s : stores) {
          read = 0;
          aIn.seek(s.begin);
          aLin = new LineReader(aIn, getConf());
          while (read + s.begin < s.end) {
            int tmp = aLin.readLine(line);
            read += tmp;
            String lineFeed = line.toString();
            String[] parsed = lineFeed.split(" ");
            parsed[0] = decodeFileName(parsed[0]);
            archive.put(new Path(parsed[0]), new HarStatus(lineFeed));
            line.clear();
          }
        }
      } finally {
        IOUtils.cleanup(LOG, aIn);
      }
    }