コード例 #1
0
ファイル: FileUtil.java プロジェクト: luochen01/tongji_thesis
 /** Copy FileSystem files to local files. */
 public static boolean copy(
     FileSystem srcFS, Path src, File dst, boolean deleteSource, Configuration conf)
     throws IOException {
   if (srcFS.getFileStatus(src).isDir()) {
     if (!dst.mkdirs()) {
       return false;
     }
     FileStatus contents[] = srcFS.listStatus(src);
     for (int i = 0; i < contents.length; i++) {
       copy(
           srcFS,
           contents[i].getPath(),
           new File(dst, contents[i].getPath().getName()),
           deleteSource,
           conf);
     }
   } else if (srcFS.isFile(src)) {
     InputStream in = srcFS.open(src);
     IOUtils.copyBytes(in, new FileOutputStream(dst), conf);
   } else {
     throw new IOException(src.toString() + ": No such file or directory");
   }
   if (deleteSource) {
     return srcFS.delete(src, true);
   } else {
     return true;
   }
 }
コード例 #2
0
ファイル: FileDataGenNew.java プロジェクト: hwx293926/HiBench
  private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset)
      throws IOException {
    RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false);
    Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>();
    while (rit.hasNext()) {
      Path path = rit.next().getPath();
      String filename =
          path.toString().substring(path.getParent().toString().length(), path.toString().length());

      if (filename.startsWith("/part-")) {
        long filesize = fs.getFileStatus(path).getLen();
        if (offset < filesize) {
          FSDataInputStream handle = fs.open(path);
          if (offset > 0) {
            handle.seek(offset);
          }
          fileHandleList.add(handle);
        }
        offset -= filesize;
      }
    }
    if (fileHandleList.size() == 1) return fileHandleList.get(0);
    else if (fileHandleList.size() > 1) {
      Enumeration<FSDataInputStream> enu = fileHandleList.elements();
      return new SequenceInputStream(enu);
    } else {
      System.err.println("Error, no source file loaded. run genSeedDataset.sh fisrt!");
      return null;
    }
  }
コード例 #3
0
ファイル: HadoopJoin.java プロジェクト: yaol9/gbkws-hadoop
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);

      Path pt = new Path("/user/yao/query/query");
      FileSystem fs = FileSystem.get(new Configuration());
      BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt)));
      String line = br.readLine();
      String[] keywords = line.split(",");
      k0 = keywords[0];
      k1 = keywords[1];
      k2 = keywords[2];
      br.close();
    }
コード例 #4
0
 /**
  * Opens an FSDataInputStream at the indicated Path.
  *
  * @param f the file name to open
  * @param bufferSize the size of the buffer to be used.
  */
 @Override
 public FSDataInputStream open(Path f, int bufferSize) throws IOException {
   FileSystem fs;
   InputStream in;
   if (verifyChecksum) {
     fs = this;
     in = new ChecksumFSInputChecker(this, f, bufferSize);
   } else {
     fs = getRawFileSystem();
     in = fs.open(f, bufferSize);
   }
   return new FSDataBoundedInputStream(fs, f, in);
 }
コード例 #5
0
ファイル: FileUtil.java プロジェクト: luochen01/tongji_thesis
  /** Copy files between FileSystems. */
  public static boolean copy(
      FileSystem srcFS,
      Path src,
      FileSystem dstFS,
      Path dst,
      boolean deleteSource,
      boolean overwrite,
      Configuration conf)
      throws IOException {
    dst = checkDest(src.getName(), dstFS, dst, overwrite);

    if (srcFS.getFileStatus(src).isDir()) {
      checkDependencies(srcFS, src, dstFS, dst);
      if (!dstFS.mkdirs(dst)) {
        return false;
      }
      FileStatus contents[] = srcFS.listStatus(src);
      for (int i = 0; i < contents.length; i++) {
        copy(
            srcFS,
            contents[i].getPath(),
            dstFS,
            new Path(dst, contents[i].getPath().getName()),
            deleteSource,
            overwrite,
            conf);
      }
    } else if (srcFS.isFile(src)) {
      InputStream in = null;
      OutputStream out = null;
      try {
        in = srcFS.open(src);
        out = dstFS.create(dst, overwrite);
        IOUtils.copyBytes(in, out, conf, true);
      } catch (IOException e) {
        IOUtils.closeStream(out);
        IOUtils.closeStream(in);
        throw e;
      }
    } else {
      throw new IOException(src.toString() + ": No such file or directory");
    }
    if (deleteSource) {
      return srcFS.delete(src, true);
    } else {
      return true;
    }
  }
コード例 #6
0
ファイル: FileUtil.java プロジェクト: luochen01/tongji_thesis
  /** Copy all files in a directory to one output file (merge). */
  public static boolean copyMerge(
      FileSystem srcFS,
      Path srcDir,
      FileSystem dstFS,
      Path dstFile,
      boolean deleteSource,
      Configuration conf,
      String addString)
      throws IOException {
    dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false);

    if (!srcFS.getFileStatus(srcDir).isDir()) return false;

    OutputStream out = dstFS.create(dstFile);

    try {
      FileStatus contents[] = srcFS.listStatus(srcDir);
      for (int i = 0; i < contents.length; i++) {
        if (!contents[i].isDir()) {
          InputStream in = srcFS.open(contents[i].getPath());
          try {
            IOUtils.copyBytes(in, out, conf, false);
            if (addString != null) out.write(addString.getBytes("UTF-8"));

          } finally {
            in.close();
          }
        }
      }
    } finally {
      out.close();
    }

    if (deleteSource) {
      return srcFS.delete(srcDir, true);
    } else {
      return true;
    }
  }
コード例 #7
0
ファイル: FileDataGenNew.java プロジェクト: hwx293926/HiBench
  public BufferedReader loadDataFromFile(String filepath, long offset) {
    try {
      Path pt = new Path(filepath);
      FileSystem fs = FileSystem.get(fsConf);
      InputStreamReader isr;
      if (fs.isDirectory(pt)) { // multiple parts
        isr = new InputStreamReader(OpenMultiplePartsWithOffset(fs, pt, offset));
      } else { // single file
        FSDataInputStream fileHandler = fs.open(pt);
        if (offset > 0) fileHandler.seek(offset);
        isr = new InputStreamReader(fileHandler);
      }

      BufferedReader reader = new BufferedReader(isr);
      if (offset > 0) reader.readLine(); // skip first line in case of seek
      return reader;
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
    assert false : "Should not reach here!";
    return null;
  }
コード例 #8
0
    public void map(Text key, Text value, Context context)
        throws InterruptedException, IOException {

      String filename = key.toString();
      String json = value.toString();

      // Make sure the input is valid
      if (!(filename.isEmpty() || json.isEmpty())) {

        // Change the json-type feature to Mat-type feature
        Mat descriptor = json2mat(json);
        if (descriptor != null) {
          // Read the query feature from the cache in Hadoop
          Mat query_features;
          String pathStr = context.getConfiguration().get("featureFilePath");
          FileSystem fs = FileSystem.get(context.getConfiguration());
          FSDataInputStream fsDataInputStream = fs.open(new Path(pathStr));
          StringBuilder sb = new StringBuilder();

          // Use a buffer to read the query_feature
          int remain = fsDataInputStream.available();
          while (remain > 0) {
            int read;
            byte[] buf = new byte[BUF_SIZE];
            read = fsDataInputStream.read(buf, fsDataInputStream.available() - remain, BUF_SIZE);
            sb.append(new String(buf, 0, read, StandardCharsets.UTF_8));
            remain = remain - read;
            System.out.println("remain:" + remain + "\tread:" + read + "\tsb.size:" + sb.length());
          }

          // Read the query_feature line by line
          //                    Scanner sc = new Scanner(fsDataInputStream, "UTF-8");
          //                    StringBuilder sb = new StringBuilder();
          //                    while (sc.hasNextLine()) {
          //                        sb.append(sc.nextLine());
          //                    }
          //                    String query_json = sb.toString();
          //                    String query_json = new String(buf, StandardCharsets.UTF_8);

          String query_json = sb.toString();
          fsDataInputStream.close();
          query_features = json2mat(query_json);

          // Get the similarity of the current database image against the query image
          DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED);
          MatOfDMatch matches = new MatOfDMatch();

          // Ensure the two features have same length of cols (the feature extracted are all 128
          // cols(at least in this case))
          if (query_features.cols() == descriptor.cols()) {

            matcher.match(query_features, descriptor, matches);
            DMatch[] dMatches = matches.toArray();

            // Calculate the max/min distances
            //                    double max_dist = Double.MAX_VALUE;
            //                    double min_dist = Double.MIN_VALUE;
            double max_dist = 0;
            double min_dist = 100;
            for (int i = 0; i < dMatches.length; i++) {
              double dist = dMatches[i].distance;
              if (min_dist > dist) min_dist = dist;
              if (max_dist < dist) max_dist = dist;
            }
            // Only distances ≤ threshold are good matches
            double threshold = max_dist * THRESHOLD_FACTOR;
            //                    double threshold = min_dist * 2;
            LinkedList<DMatch> goodMatches = new LinkedList<DMatch>();

            for (int i = 0; i < dMatches.length; i++) {
              if (dMatches[i].distance <= threshold) {
                goodMatches.addLast(dMatches[i]);
              }
            }

            // Get the ratio of good_matches to all_matches
            double ratio = (double) goodMatches.size() / (double) dMatches.length;

            System.out.println("*** current_record_filename:" + filename + " ***");
            System.out.println("feature:" + descriptor + "\nquery_feature:" + query_features);
            System.out.println(
                "min_dist of keypoints:" + min_dist + "  max_dist of keypoints:" + max_dist);
            System.out.println(
                "total_matches:" + dMatches.length + "\tgood_matches:" + goodMatches.size());
            //                    System.out.println("type:" + descriptor.type() + " channels:" +
            // descriptor.channels() + " rows:" + descriptor.rows() + " cols:" + descriptor.cols());
            //                    System.out.println("qtype:" + query_features.type() + "
            // qchannels:" + query_features.channels() + " qrows:" + query_features.rows() + "
            // qcols:" + query_features.cols());
            System.out.println();

            if (ratio > PERCENTAGE_THRESHOLD) {
              // Key:1        Value:filename|ratio
              context.write(ONE, new Text(filename + "|" + ratio));
              //                        context.write(ONE, new Text(filename + "|" +
              // String.valueOf(goodMatches.size())));
            }
          } else {
            System.out.println("The size of the features are not equal");
          }
        } else {
          // a null pointer, do nothing
          System.out.println("A broken/null feature:" + filename);
          System.out.println();
        }
      }
    }