Пример #1
1
    private File getHfdsFileToTmpFile(String hdfsPath, HdfsConfiguration configuration) {
      try {
        String fname = hdfsPath.substring(hdfsPath.lastIndexOf('/'));

        File outputDest = File.createTempFile(fname, ".hdfs");
        if (outputDest.exists()) {
          outputDest.delete();
        }

        HdfsInfo hdfsInfo = HdfsInfoFactory.newHdfsInfo(hdfsPath);
        FileSystem fileSystem = hdfsInfo.getFileSystem();
        FileUtil.copy(fileSystem, new Path(hdfsPath), outputDest, false, fileSystem.getConf());
        try {
          FileUtil.copyMerge(
              fileSystem, // src
              new Path(hdfsPath),
              FileSystem.getLocal(new Configuration()), // dest
              new Path(outputDest.toURI()),
              false,
              fileSystem.getConf(),
              null);
        } catch (IOException e) {
          return outputDest;
        }

        return new File(outputDest, fname);
      } catch (IOException ex) {
        throw new RuntimeCamelException(ex);
      }
    }
Пример #2
0
  @Test
  public void testWordCountMR() throws Exception {
    // INIT
    Path inputPath = new Path(box.getInputPath(), "wordCount");
    Path resultPath = new Path("testMapReduceResult");

    String[] content = new String[] {"1 2 3\n", "2 3 4\n", "4 5 6 6\n"};
    DFSUtil.writeToFile(box.getFS(), inputPath, true, content);

    Job job =
        MRUtil.setUpJob(
            box.getConf(),
            WordCount.class,
            WordCount.Map.class,
            WordCount.Reduce.class,
            Text.class,
            IntWritable.class,
            box.getInputPath(),
            box.getOutputPath());

    // ACT & ASSERT
    boolean jobResult = job.waitForCompletion(true);

    assertTrue(jobResult);
    FileUtil.copyMerge(
        box.getFS(), box.getOutputPath(), box.getFS(), resultPath, false, box.getConf(), "\n");
    String result = DFSUtil.getFileContent(box.getFS(), resultPath);

    Pattern p = Pattern.compile("[\\s]+");
    for (String ln : result.split("\n")) {
      String[] splitty = p.split(ln.trim());
      if (splitty.length == 2) {
        int term = new Integer(splitty[0]);
        int freq = new Integer(splitty[1]);
        switch (term) {
          case 1:
            assertEquals(freq, 1);
            break;
          case 2:
            assertEquals(freq, 2);
            break;
          case 3:
            assertEquals(freq, 2);
            break;
          case 4:
            assertEquals(freq, 2);
            break;
          case 5:
            assertEquals(freq, 1);
            break;
          case 6:
            assertEquals(freq, 2);
            break;
          default:
            throw new Exception("Unknown term " + term);
        }
      }
    }
  }
Пример #3
0
  /**
   * Calls FileUtil.copyMerge using the specified source and destination paths. Both source and
   * destination are assumed to be on the local file system. The call will not delete source on
   * completion and will not add an additional string between files.
   *
   * @param src String non-null source path.
   * @param dst String non-null destination path.
   * @return boolean true if the call to FileUtil.copyMerge was successful.
   * @throws IOException if an I/O error occurs.
   */
  private boolean copyMerge(String src, String dst) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    final boolean result;

    try {
      Path srcPath = new Path(TEST_ROOT_DIR, src);
      Path dstPath = new Path(TEST_ROOT_DIR, dst);
      boolean deleteSource = false;
      String addString = null;
      result = FileUtil.copyMerge(fs, srcPath, fs, dstPath, deleteSource, conf, addString);
    } finally {
      fs.close();
    }

    return result;
  }
Пример #4
0
  /**
   * 해당 경로에 있는 파일을 MERGE한다.
   *
   * @param conf Hadoop Configuration
   * @param path HDFS Path
   * @throws java.io.IOException Get Merge할 수 없는 경우
   */
  public static void merge(Configuration conf, String path) throws IOException {
    // 입력 경로의 모든 파일을 Get Merge하여 임시 파일에 기록한다.
    FileSystem fileSystem = FileSystem.get(conf);
    Path source = new Path(path);
    if (!fileSystem.getFileStatus(source).isDir()) {
      // 이미 파일이라면 더이상 Get Merge할 필요없다.
      return;
    }
    Path target = new Path(path + "_temporary");
    FileUtil.copyMerge(fileSystem, source, fileSystem, target, true, conf, null);

    // 원 소스 파일을 삭제한다.
    fileSystem.delete(source, true);

    // 임시 파일을 원 소스 파일명으로 대체한다.
    Path in = new Path(path + "_temporary");
    Path out = new Path(path);
    fileSystem.rename(in, out);

    // 임시 디렉토리를 삭제한다.
    fileSystem.delete(new Path(path + "_temporary"), true);
  }
Пример #5
0
 private void distributeFiles() {
   try {
     URI[] uris = DistributedCache.getCacheFiles(conf);
     if (uris != null) {
       URI[] outURIs = new URI[uris.length];
       for (int i = 0; i < uris.length; i++) {
         Path path = new Path(uris[i]);
         FileSystem fs = path.getFileSystem(conf);
         if (fs.isFile(path)) {
           outURIs[i] = uris[i];
         } else {
           Path mergePath = new Path(path.getParent(), "sparkreadable-" + path.getName());
           FileUtil.copyMerge(fs, path, fs, mergePath, false, conf, "");
           outURIs[i] = mergePath.toUri();
         }
         sparkContext.addFile(outURIs[i].toString());
       }
       DistributedCache.setCacheFiles(outURIs, conf);
     }
   } catch (IOException e) {
     throw new RuntimeException("Error retrieving cache files", e);
   }
 }