コード例 #1
0
  /** Test deleteOnExit */
  public void testDeleteOnExit() throws IOException {
    Configuration conf = new Configuration();
    if (simulatedStorage) {
      conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true);
    }
    MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
    FileSystem fs = cluster.getFileSystem();
    FileSystem localfs = FileSystem.getLocal(conf);

    try {

      // Creates files in HDFS and local file system.
      //
      Path file1 = new Path("filestatus.dat");
      Path file2 = new Path("filestatus2.dat");
      Path file3 = new Path("filestatus3.dat");
      FSDataOutputStream stm1 = createFile(fs, file1, 1);
      FSDataOutputStream stm2 = createFile(fs, file2, 1);
      FSDataOutputStream stm3 = createFile(localfs, file3, 1);
      System.out.println("DeleteOnExit: Created files.");

      // write to files and close. Purposely, do not close file2.
      writeFile(stm1);
      writeFile(stm3);
      stm1.close();
      stm2.close();
      stm3.close();

      // set delete on exit flag on files.
      fs.deleteOnExit(file1);
      fs.deleteOnExit(file2);
      localfs.deleteOnExit(file3);

      // close the file system. This should make the above files
      // disappear.
      fs.close();
      localfs.close();
      fs = null;
      localfs = null;

      // reopen file system and verify that file does not exist.
      fs = cluster.getFileSystem();
      localfs = FileSystem.getLocal(conf);

      assertTrue(file1 + " still exists inspite of deletOnExit set.", !fs.exists(file1));
      assertTrue(file2 + " still exists inspite of deletOnExit set.", !fs.exists(file2));
      assertTrue(file3 + " still exists inspite of deletOnExit set.", !localfs.exists(file3));
      System.out.println("DeleteOnExit successful.");

    } finally {
      IOUtils.closeStream(fs);
      IOUtils.closeStream(localfs);
      cluster.shutdown();
    }
  }
コード例 #2
0
ファイル: Context.java プロジェクト: pensz/hive
  /**
   * Get a tmp directory on specified URI
   *
   * @param scheme Scheme of the target FS
   * @param authority Authority of the target FS
   * @param mkdir create the directory if true
   * @param scratchdir path of tmp directory
   */
  private String getScratchDir(String scheme, String authority, boolean mkdir, String scratchDir) {

    String fileSystem = scheme + ":" + authority;
    String dir = fsScratchDirs.get(fileSystem);

    if (dir == null) {
      Path dirPath = new Path(scheme, authority, scratchDir);
      if (mkdir) {
        try {
          FileSystem fs = dirPath.getFileSystem(conf);
          dirPath = new Path(fs.makeQualified(dirPath).toString());
          if (!fs.mkdirs(dirPath)) {
            throw new RuntimeException("Cannot make directory: " + dirPath.toString());
          }
          if (isHDFSCleanup) {
            fs.deleteOnExit(dirPath);
          }
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
      dir = dirPath.toString();
      fsScratchDirs.put(fileSystem, dir);
    }
    return dir;
  }
コード例 #3
0
  // Create temp directory in HDFS to store logsearch logs before sorting
  public void tmpDirHDFS(
      boolean quiet, boolean silent, FileSystem fs, Configuration conf, String tmp, boolean log) {
    logConsole(quiet, silent, info, "Creating new Temp Directory in HDFS: " + tmp);

    try {
      Path path = new Path(tmp);
      if (!(fs.exists(path))) {
        // Create directory
        fs.mkdirs(path);
        if (log != true) {
          fs.deleteOnExit(path);
        }
      }
    } catch (IOException e) {
      if (e.toString().contains("Failed to find any Kerberos")) {
        logConsole(true, true, error, "No/bad Kerberos ticket - please authenticate.");
        System.exit(1);
      } else if (e.toString().contains("quota") && e.toString().contains("exceeded")) {
        logConsole(true, true, error, "Disk quota Exceeded.");
        System.exit(1);
      }
      e.printStackTrace();
      System.exit(1);
    }
  }
コード例 #4
0
 // 删除文件或文件夹
 public void rmr(String folder) throws IOException {
   Path path = new Path(folder);
   FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
   fs.deleteOnExit(path);
   log.debug("Delete: " + folder);
   fs.close();
 }
コード例 #5
0
ファイル: HdfsOperater.java プロジェクト: Jude7/BC-BSP
 public static void deleteHdfs(String hdfsFile) {
   try {
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(URI.create(hdfsFile), conf);
     fs.deleteOnExit(new Path(hdfsFile));
     fs.close();
   } catch (IOException e) {
     LOG.error("[deleteHdfs]", e);
   }
 }
コード例 #6
0
ファイル: HFileOutputFormat2.java プロジェクト: mringg/hbase
  /**
   * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against <code>splitPoints
   * </code>. Cleans up the partitions file after job exists.
   */
  static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
      throws IOException {
    Configuration conf = job.getConfiguration();
    // create the partitions file
    FileSystem fs = FileSystem.get(conf);
    Path partitionsPath = new Path(conf.get("hbase.fs.tmp.dir"), "partitions_" + UUID.randomUUID());
    fs.makeQualified(partitionsPath);
    writePartitions(conf, partitionsPath, splitPoints);
    fs.deleteOnExit(partitionsPath);

    // configure job to use it
    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
  }
コード例 #7
0
ファイル: testjava3.java プロジェクト: eboomyl/workflow
  public static void main(String[] args) throws IOException {
    Path f = new Path(args[0]);
    System.out.println("javaaction test testjava3" + args[0]);
    Configuration conf = new Configuration();
    FileSystem hdfs = null;
    try {
      hdfs = FileSystem.get(conf);
      hdfs.deleteOnExit(f);
    } catch (IOException e) {

      e.printStackTrace();
      System.exit(1);
    } finally {
      if (null != hdfs) hdfs.close();
    }
  }
コード例 #8
0
  /**
   * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against <code>splitPoints
   * </code>. Cleans up the partitions file after job exists.
   */
  static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
      throws IOException {
    Configuration conf = job.getConfiguration();
    // create the partitions file
    FileSystem fs = FileSystem.get(conf);
    String hbaseTmpFsDir =
        conf.get(
            HConstants.TEMPORARY_FS_DIRECTORY_KEY, HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
    Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
    fs.makeQualified(partitionsPath);
    writePartitions(conf, partitionsPath, splitPoints);
    fs.deleteOnExit(partitionsPath);

    // configure job to use it
    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
  }
コード例 #9
0
 /**
  * Create a given path if it doesn't exist.
  *
  * @param conf
  * @param pathString
  * @param permission
  * @param isLocal
  * @param isCleanUp
  * @return
  * @throws IOException
  */
 private void createPath(
     HiveConf conf, Path path, String permission, boolean isLocal, boolean isCleanUp)
     throws IOException {
   FsPermission fsPermission = new FsPermission(permission);
   FileSystem fs;
   if (isLocal) {
     fs = FileSystem.getLocal(conf);
   } else {
     fs = path.getFileSystem(conf);
   }
   if (!fs.exists(path)) {
     fs.mkdirs(path, fsPermission);
     String dirType = isLocal ? "local" : "HDFS";
     LOG.info("Created " + dirType + " directory: " + path.toString());
   }
   if (isCleanUp) {
     fs.deleteOnExit(path);
   }
 }
コード例 #10
0
  /** Testing {@link ResourceUsageMetrics} using {@link HadoopLogsAnalyzer}. */
  @Test
  @SuppressWarnings("deprecation")
  public void testResourceUsageMetricsWithHadoopLogsAnalyzer() throws IOException {
    Configuration conf = new Configuration();
    // get the input trace file
    Path rootInputDir = new Path(System.getProperty("test.tools.input.dir", ""));
    Path rootInputSubFolder = new Path(rootInputDir, "rumen/small-trace-test");
    Path traceFile = new Path(rootInputSubFolder, "v20-resource-usage-log.gz");

    FileSystem lfs = FileSystem.getLocal(conf);

    // define the root test directory
    Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp"));

    // define output directory
    Path outputDir = new Path(rootTempDir, "testResourceUsageMetricsWithHadoopLogsAnalyzer");
    lfs.delete(outputDir, true);
    lfs.deleteOnExit(outputDir);

    // run HadoopLogsAnalyzer
    HadoopLogsAnalyzer analyzer = new HadoopLogsAnalyzer();
    analyzer.setConf(conf);
    Path traceOutput = new Path(outputDir, "trace.json");
    analyzer.run(
        new String[] {
          "-write-job-trace", traceOutput.toString(),
          "-v1", traceFile.toString()
        });

    // test HadoopLogsAnalyzer's output w.r.t ResourceUsageMetrics
    //  get the logged job
    JsonObjectMapperParser<LoggedJob> traceParser =
        new JsonObjectMapperParser<LoggedJob>(traceOutput, LoggedJob.class, conf);

    //  get the logged job from the output trace file
    LoggedJob job = traceParser.getNext();
    LoggedTaskAttempt attempt = job.getMapTasks().get(0).getAttempts().get(0);
    ResourceUsageMetrics metrics = attempt.getResourceUsageMetrics();

    //  test via deepCompare()
    testResourceUsageMetricViaDeepCompare(metrics, 200, 100, 75, 50, true);
  }
コード例 #11
0
  public BatchGroup mergeAndSpill(LinkedList<BatchGroup> batchGroups) throws SchemaChangeException {
    logger.debug("Copier allocator current allocation {}", copierAllocator.getAllocatedMemory());
    logger.debug(
        "mergeAndSpill: starting total size in memory = {}", oAllocator.getAllocatedMemory());
    VectorContainer outputContainer = new VectorContainer();
    List<BatchGroup> batchGroupList = Lists.newArrayList();
    int batchCount = batchGroups.size();
    for (int i = 0; i < batchCount / 2; i++) {
      if (batchGroups.size() == 0) {
        break;
      }
      BatchGroup batch = batchGroups.pollLast();
      assert batch != null : "Encountered a null batch during merge and spill operation";
      batchGroupList.add(batch);
    }

    if (batchGroupList.size() == 0) {
      return null;
    }
    int estimatedRecordSize = 0;
    for (VectorWrapper<?> w : batchGroupList.get(0)) {
      try {
        estimatedRecordSize += TypeHelper.getSize(w.getField().getType());
      } catch (UnsupportedOperationException e) {
        estimatedRecordSize += 50;
      }
    }
    int targetRecordCount = Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize);
    VectorContainer hyperBatch = constructHyperBatch(batchGroupList);
    createCopier(hyperBatch, batchGroupList, outputContainer, true);

    int count = copier.next(targetRecordCount);
    assert count > 0;

    logger.debug(
        "mergeAndSpill: estimated record size = {}, target record count = {}",
        estimatedRecordSize,
        targetRecordCount);

    // 1 output container is kept in memory, so we want to hold on to it and transferClone
    // allows keeping ownership
    VectorContainer c1 = VectorContainer.getTransferClone(outputContainer, oContext);
    c1.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    c1.setRecordCount(count);

    String spillDir = dirs.next();
    Path currSpillPath = new Path(Joiner.on("/").join(spillDir, fileName));
    currSpillDirs.add(currSpillPath);
    String outputFile = Joiner.on("/").join(currSpillPath, spillCount++);
    try {
      fs.deleteOnExit(currSpillPath);
    } catch (IOException e) {
      // since this is meant to be used in a batches's spilling, we don't propagate the exception
      logger.warn("Unable to mark spill directory " + currSpillPath + " for deleting on exit", e);
    }
    stats.setLongStat(Metric.SPILL_COUNT, spillCount);
    BatchGroup newGroup = new BatchGroup(c1, fs, outputFile, oContext);
    try (AutoCloseable a = AutoCloseables.all(batchGroupList)) {
      logger.info("Merging and spilling to {}", outputFile);
      while ((count = copier.next(targetRecordCount)) > 0) {
        outputContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
        outputContainer.setRecordCount(count);
        // note that addBatch also clears the outputContainer
        newGroup.addBatch(outputContainer);
      }
      injector.injectChecked(
          context.getExecutionControls(), INTERRUPTION_WHILE_SPILLING, IOException.class);
      newGroup.closeOutputStream();
    } catch (Throwable e) {
      // we only need to cleanup newGroup if spill failed
      try {
        AutoCloseables.close(e, newGroup);
      } catch (Throwable t) {
        /* close() may hit the same IO issue; just ignore */
      }
      throw UserException.resourceError(e)
          .message("External Sort encountered an error while spilling to disk")
          .addContext(e.getMessage() /* more detail */)
          .build(logger);
    } finally {
      hyperBatch.clear();
    }
    logger.debug("mergeAndSpill: final total size in memory = {}", oAllocator.getAllocatedMemory());
    logger.info("Completed spilling to {}", outputFile);
    return newGroup;
  }
  @Override
  public int run(String[] args) throws Exception {
    addInputOption();
    addOutputOption();
    addOption(
        "cleanUp", "clean", "true if want to clean up intermediate files.", String.valueOf(true));
    addOption("startIndex", "start", "start index.", String.valueOf(0));

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
      return -1;
    }

    Path inputPath = getInputPath();
    Path recordsPath = getTempPath("records");
    Path summaryPath = getTempPath("summary");
    FileSystem fs = FileSystem.get(getConf());

    //  1. count how many records(lines) in each partition.
    //  2. store each lines in each partition into temp files.
    //  step 2 is necessary because hadoop partition into into differenct partition id each time.
    //
    Job countJob =
        prepareJob(
            inputPath,
            summaryPath,
            TextInputFormat.class,
            CountPartitionRecordNumMapper.class,
            IntWritable.class,
            LongWritable.class,
            CountPartitionRecordNumReducer.class,
            IntWritable.class,
            LongWritable.class,
            SequenceFileOutputFormat.class);
    countJob.getConfiguration().set(RECORDS_PATH, recordsPath.toString());
    countJob.setCombinerClass(CountPartitionRecordNumReducer.class);
    countJob.waitForCompletion(true);

    Job generateJob =
        prepareJob(
            recordsPath,
            getOutputPath(),
            SequenceFileInputFormat.class,
            AssignRecordIdMapper.class,
            NullWritable.class,
            Text.class,
            TextOutputFormat.class);
    generateJob.getConfiguration().set(SUMMARY_PATH, summaryPath.toString());
    generateJob
        .getConfiguration()
        .setLong(
            START_INDEX,
            getOption("startIndex") == null ? 0 : Long.parseLong(getOption("startIndex")));
    generateJob.waitForCompletion(true);
    // clean up
    if (getOption("cleanUp").equals("true")) {
      if (fs.exists(recordsPath)) {
        fs.delete(recordsPath, true);
      }
      if (fs.exists(summaryPath)) {
        fs.delete(summaryPath, true);
      }
      fs.deleteOnExit(getTempPath());
    }
    // record how many id has been created
    totalIdCount =
        generateJob
            .getCounters()
            .findCounter(SequentialIdGeneratorJob.COUNT.TOTAL_ID_COUNT)
            .getValue();
    return 0;
  }