/** Test deleteOnExit */ public void testDeleteOnExit() throws IOException { Configuration conf = new Configuration(); if (simulatedStorage) { conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true); } MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); FileSystem fs = cluster.getFileSystem(); FileSystem localfs = FileSystem.getLocal(conf); try { // Creates files in HDFS and local file system. // Path file1 = new Path("filestatus.dat"); Path file2 = new Path("filestatus2.dat"); Path file3 = new Path("filestatus3.dat"); FSDataOutputStream stm1 = createFile(fs, file1, 1); FSDataOutputStream stm2 = createFile(fs, file2, 1); FSDataOutputStream stm3 = createFile(localfs, file3, 1); System.out.println("DeleteOnExit: Created files."); // write to files and close. Purposely, do not close file2. writeFile(stm1); writeFile(stm3); stm1.close(); stm2.close(); stm3.close(); // set delete on exit flag on files. fs.deleteOnExit(file1); fs.deleteOnExit(file2); localfs.deleteOnExit(file3); // close the file system. This should make the above files // disappear. fs.close(); localfs.close(); fs = null; localfs = null; // reopen file system and verify that file does not exist. fs = cluster.getFileSystem(); localfs = FileSystem.getLocal(conf); assertTrue(file1 + " still exists inspite of deletOnExit set.", !fs.exists(file1)); assertTrue(file2 + " still exists inspite of deletOnExit set.", !fs.exists(file2)); assertTrue(file3 + " still exists inspite of deletOnExit set.", !localfs.exists(file3)); System.out.println("DeleteOnExit successful."); } finally { IOUtils.closeStream(fs); IOUtils.closeStream(localfs); cluster.shutdown(); } }
/** * Get a tmp directory on specified URI * * @param scheme Scheme of the target FS * @param authority Authority of the target FS * @param mkdir create the directory if true * @param scratchdir path of tmp directory */ private String getScratchDir(String scheme, String authority, boolean mkdir, String scratchDir) { String fileSystem = scheme + ":" + authority; String dir = fsScratchDirs.get(fileSystem); if (dir == null) { Path dirPath = new Path(scheme, authority, scratchDir); if (mkdir) { try { FileSystem fs = dirPath.getFileSystem(conf); dirPath = new Path(fs.makeQualified(dirPath).toString()); if (!fs.mkdirs(dirPath)) { throw new RuntimeException("Cannot make directory: " + dirPath.toString()); } if (isHDFSCleanup) { fs.deleteOnExit(dirPath); } } catch (IOException e) { throw new RuntimeException(e); } } dir = dirPath.toString(); fsScratchDirs.put(fileSystem, dir); } return dir; }
// Create temp directory in HDFS to store logsearch logs before sorting public void tmpDirHDFS( boolean quiet, boolean silent, FileSystem fs, Configuration conf, String tmp, boolean log) { logConsole(quiet, silent, info, "Creating new Temp Directory in HDFS: " + tmp); try { Path path = new Path(tmp); if (!(fs.exists(path))) { // Create directory fs.mkdirs(path); if (log != true) { fs.deleteOnExit(path); } } } catch (IOException e) { if (e.toString().contains("Failed to find any Kerberos")) { logConsole(true, true, error, "No/bad Kerberos ticket - please authenticate."); System.exit(1); } else if (e.toString().contains("quota") && e.toString().contains("exceeded")) { logConsole(true, true, error, "Disk quota Exceeded."); System.exit(1); } e.printStackTrace(); System.exit(1); } }
// 删除文件或文件夹 public void rmr(String folder) throws IOException { Path path = new Path(folder); FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf); fs.deleteOnExit(path); log.debug("Delete: " + folder); fs.close(); }
public static void deleteHdfs(String hdfsFile) { try { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(hdfsFile), conf); fs.deleteOnExit(new Path(hdfsFile)); fs.close(); } catch (IOException e) { LOG.error("[deleteHdfs]", e); } }
/** * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against <code>splitPoints * </code>. Cleans up the partitions file after job exists. */ static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException { Configuration conf = job.getConfiguration(); // create the partitions file FileSystem fs = FileSystem.get(conf); Path partitionsPath = new Path(conf.get("hbase.fs.tmp.dir"), "partitions_" + UUID.randomUUID()); fs.makeQualified(partitionsPath); writePartitions(conf, partitionsPath, splitPoints); fs.deleteOnExit(partitionsPath); // configure job to use it job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(conf, partitionsPath); }
public static void main(String[] args) throws IOException { Path f = new Path(args[0]); System.out.println("javaaction test testjava3" + args[0]); Configuration conf = new Configuration(); FileSystem hdfs = null; try { hdfs = FileSystem.get(conf); hdfs.deleteOnExit(f); } catch (IOException e) { e.printStackTrace(); System.exit(1); } finally { if (null != hdfs) hdfs.close(); } }
/** * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against <code>splitPoints * </code>. Cleans up the partitions file after job exists. */ static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException { Configuration conf = job.getConfiguration(); // create the partitions file FileSystem fs = FileSystem.get(conf); String hbaseTmpFsDir = conf.get( HConstants.TEMPORARY_FS_DIRECTORY_KEY, HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY); Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID()); fs.makeQualified(partitionsPath); writePartitions(conf, partitionsPath, splitPoints); fs.deleteOnExit(partitionsPath); // configure job to use it job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(conf, partitionsPath); }
/** * Create a given path if it doesn't exist. * * @param conf * @param pathString * @param permission * @param isLocal * @param isCleanUp * @return * @throws IOException */ private void createPath( HiveConf conf, Path path, String permission, boolean isLocal, boolean isCleanUp) throws IOException { FsPermission fsPermission = new FsPermission(permission); FileSystem fs; if (isLocal) { fs = FileSystem.getLocal(conf); } else { fs = path.getFileSystem(conf); } if (!fs.exists(path)) { fs.mkdirs(path, fsPermission); String dirType = isLocal ? "local" : "HDFS"; LOG.info("Created " + dirType + " directory: " + path.toString()); } if (isCleanUp) { fs.deleteOnExit(path); } }
/** Testing {@link ResourceUsageMetrics} using {@link HadoopLogsAnalyzer}. */ @Test @SuppressWarnings("deprecation") public void testResourceUsageMetricsWithHadoopLogsAnalyzer() throws IOException { Configuration conf = new Configuration(); // get the input trace file Path rootInputDir = new Path(System.getProperty("test.tools.input.dir", "")); Path rootInputSubFolder = new Path(rootInputDir, "rumen/small-trace-test"); Path traceFile = new Path(rootInputSubFolder, "v20-resource-usage-log.gz"); FileSystem lfs = FileSystem.getLocal(conf); // define the root test directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")); // define output directory Path outputDir = new Path(rootTempDir, "testResourceUsageMetricsWithHadoopLogsAnalyzer"); lfs.delete(outputDir, true); lfs.deleteOnExit(outputDir); // run HadoopLogsAnalyzer HadoopLogsAnalyzer analyzer = new HadoopLogsAnalyzer(); analyzer.setConf(conf); Path traceOutput = new Path(outputDir, "trace.json"); analyzer.run( new String[] { "-write-job-trace", traceOutput.toString(), "-v1", traceFile.toString() }); // test HadoopLogsAnalyzer's output w.r.t ResourceUsageMetrics // get the logged job JsonObjectMapperParser<LoggedJob> traceParser = new JsonObjectMapperParser<LoggedJob>(traceOutput, LoggedJob.class, conf); // get the logged job from the output trace file LoggedJob job = traceParser.getNext(); LoggedTaskAttempt attempt = job.getMapTasks().get(0).getAttempts().get(0); ResourceUsageMetrics metrics = attempt.getResourceUsageMetrics(); // test via deepCompare() testResourceUsageMetricViaDeepCompare(metrics, 200, 100, 75, 50, true); }
public BatchGroup mergeAndSpill(LinkedList<BatchGroup> batchGroups) throws SchemaChangeException { logger.debug("Copier allocator current allocation {}", copierAllocator.getAllocatedMemory()); logger.debug( "mergeAndSpill: starting total size in memory = {}", oAllocator.getAllocatedMemory()); VectorContainer outputContainer = new VectorContainer(); List<BatchGroup> batchGroupList = Lists.newArrayList(); int batchCount = batchGroups.size(); for (int i = 0; i < batchCount / 2; i++) { if (batchGroups.size() == 0) { break; } BatchGroup batch = batchGroups.pollLast(); assert batch != null : "Encountered a null batch during merge and spill operation"; batchGroupList.add(batch); } if (batchGroupList.size() == 0) { return null; } int estimatedRecordSize = 0; for (VectorWrapper<?> w : batchGroupList.get(0)) { try { estimatedRecordSize += TypeHelper.getSize(w.getField().getType()); } catch (UnsupportedOperationException e) { estimatedRecordSize += 50; } } int targetRecordCount = Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize); VectorContainer hyperBatch = constructHyperBatch(batchGroupList); createCopier(hyperBatch, batchGroupList, outputContainer, true); int count = copier.next(targetRecordCount); assert count > 0; logger.debug( "mergeAndSpill: estimated record size = {}, target record count = {}", estimatedRecordSize, targetRecordCount); // 1 output container is kept in memory, so we want to hold on to it and transferClone // allows keeping ownership VectorContainer c1 = VectorContainer.getTransferClone(outputContainer, oContext); c1.buildSchema(BatchSchema.SelectionVectorMode.NONE); c1.setRecordCount(count); String spillDir = dirs.next(); Path currSpillPath = new Path(Joiner.on("/").join(spillDir, fileName)); currSpillDirs.add(currSpillPath); String outputFile = Joiner.on("/").join(currSpillPath, spillCount++); try { fs.deleteOnExit(currSpillPath); } catch (IOException e) { // since this is meant to be used in a batches's spilling, we don't propagate the exception logger.warn("Unable to mark spill directory " + currSpillPath + " for deleting on exit", e); } stats.setLongStat(Metric.SPILL_COUNT, spillCount); BatchGroup newGroup = new BatchGroup(c1, fs, outputFile, oContext); try (AutoCloseable a = AutoCloseables.all(batchGroupList)) { logger.info("Merging and spilling to {}", outputFile); while ((count = copier.next(targetRecordCount)) > 0) { outputContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE); outputContainer.setRecordCount(count); // note that addBatch also clears the outputContainer newGroup.addBatch(outputContainer); } injector.injectChecked( context.getExecutionControls(), INTERRUPTION_WHILE_SPILLING, IOException.class); newGroup.closeOutputStream(); } catch (Throwable e) { // we only need to cleanup newGroup if spill failed try { AutoCloseables.close(e, newGroup); } catch (Throwable t) { /* close() may hit the same IO issue; just ignore */ } throw UserException.resourceError(e) .message("External Sort encountered an error while spilling to disk") .addContext(e.getMessage() /* more detail */) .build(logger); } finally { hyperBatch.clear(); } logger.debug("mergeAndSpill: final total size in memory = {}", oAllocator.getAllocatedMemory()); logger.info("Completed spilling to {}", outputFile); return newGroup; }
@Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption( "cleanUp", "clean", "true if want to clean up intermediate files.", String.valueOf(true)); addOption("startIndex", "start", "start index.", String.valueOf(0)); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Path inputPath = getInputPath(); Path recordsPath = getTempPath("records"); Path summaryPath = getTempPath("summary"); FileSystem fs = FileSystem.get(getConf()); // 1. count how many records(lines) in each partition. // 2. store each lines in each partition into temp files. // step 2 is necessary because hadoop partition into into differenct partition id each time. // Job countJob = prepareJob( inputPath, summaryPath, TextInputFormat.class, CountPartitionRecordNumMapper.class, IntWritable.class, LongWritable.class, CountPartitionRecordNumReducer.class, IntWritable.class, LongWritable.class, SequenceFileOutputFormat.class); countJob.getConfiguration().set(RECORDS_PATH, recordsPath.toString()); countJob.setCombinerClass(CountPartitionRecordNumReducer.class); countJob.waitForCompletion(true); Job generateJob = prepareJob( recordsPath, getOutputPath(), SequenceFileInputFormat.class, AssignRecordIdMapper.class, NullWritable.class, Text.class, TextOutputFormat.class); generateJob.getConfiguration().set(SUMMARY_PATH, summaryPath.toString()); generateJob .getConfiguration() .setLong( START_INDEX, getOption("startIndex") == null ? 0 : Long.parseLong(getOption("startIndex"))); generateJob.waitForCompletion(true); // clean up if (getOption("cleanUp").equals("true")) { if (fs.exists(recordsPath)) { fs.delete(recordsPath, true); } if (fs.exists(summaryPath)) { fs.delete(summaryPath, true); } fs.deleteOnExit(getTempPath()); } // record how many id has been created totalIdCount = generateJob .getCounters() .findCounter(SequentialIdGeneratorJob.COUNT.TOTAL_ID_COUNT) .getValue(); return 0; }