コード例 #1
0
  /**
   * @param rdd
   * @param fname
   * @param inSingleFile
   * @throws DMLRuntimeException
   */
  private void customSaveTextFile(JavaRDD<String> rdd, String fname, boolean inSingleFile)
      throws DMLRuntimeException {
    if (inSingleFile) {
      Random rand = new Random();
      String randFName = fname + "_" + rand.nextLong() + "_" + rand.nextLong();
      try {
        while (MapReduceTool.existsFileOnHDFS(randFName)) {
          randFName = fname + "_" + rand.nextLong() + "_" + rand.nextLong();
        }

        rdd.saveAsTextFile(randFName);
        MapReduceTool.mergeIntoSingleFile(randFName, fname); // Faster version :)

        // rdd.coalesce(1, true).saveAsTextFile(randFName);
        // MapReduceTool.copyFileOnHDFS(randFName + "/part-00000", fname);
      } catch (IOException e) {
        throw new DMLRuntimeException(
            "Cannot merge the output into single file: " + e.getMessage());
      } finally {
        try {
          // This is to make sure that we donot create random files on HDFS
          MapReduceTool.deleteFileIfExistOnHDFS(randFName);
        } catch (IOException e) {
          throw new DMLRuntimeException(
              "Cannot merge the output into single file: " + e.getMessage());
        }
      }
    } else {
      rdd.saveAsTextFile(fname);
    }
  }