public void testComplexNameWithRegex() throws Exception { OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close(); JobConf conf = createJobConf(); conf.setJobName("name \\Evalue]"); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IdentityMapper.class); FileInputFormat.setInputPaths(conf, getInputDir()); FileOutputFormat.setOutputPath(conf, getOutputDir()); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(getOutputDir(), new OutputLogFilter())); assertEquals(1, outputFiles.length); InputStream is = getFileSystem().open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); assertEquals("0\tb a", reader.readLine()); assertNull(reader.readLine()); reader.close(); }
/** * 입력으로 선택한 경로를 지정한 목적 경로로 이동한다. * * @param source 이동할 경로 * @param target 이동할 위치 * @param fs Hadoop FileSystem */ public static void move(String source, String target, FileSystem fs) throws Exception { Path srcPath = new Path(source); Path[] srcs = FileUtil.stat2Paths(fs.globStatus(srcPath), srcPath); Path dst = new Path(target); if (srcs.length > 1 && !fs.getFileStatus(dst).isDir()) { throw new FileSystemException( "When moving multiple files, destination should be a directory."); } for (int i = 0; i < srcs.length; i++) { if (!fs.rename(srcs[i], dst)) { FileStatus srcFstatus = null; FileStatus dstFstatus = null; try { srcFstatus = fs.getFileStatus(srcs[i]); } catch (FileNotFoundException e) { throw new FileNotFoundException(srcs[i] + ": No such file or directory"); } try { dstFstatus = fs.getFileStatus(dst); } catch (IOException e) { // Nothing } if ((srcFstatus != null) && (dstFstatus != null)) { if (srcFstatus.isDir() && !dstFstatus.isDir()) { throw new FileSystemException( "cannot overwrite non directory " + dst + " with directory " + srcs[i]); } } throw new FileSystemException("Failed to rename " + srcs[i] + " to " + dst); } } }
public static boolean globDelete(final FileSystem fs, final String path, final boolean recursive) throws IOException { boolean deleted = false; for (final Path p : FileUtil.stat2Paths(fs.globStatus(new Path(path)))) { fs.delete(p, recursive); deleted = true; } return deleted; }
@Test(timeout = 30000) public void testStat2Paths1() { assertNull(FileUtil.stat2Paths(null)); FileStatus[] fileStatuses = new FileStatus[0]; Path[] paths = FileUtil.stat2Paths(fileStatuses); assertEquals(0, paths.length); Path path1 = new Path("file://foo"); Path path2 = new Path("file://moo"); fileStatuses = new FileStatus[] { new FileStatus(3, false, 0, 0, 0, path1), new FileStatus(3, false, 0, 0, 0, path2) }; paths = FileUtil.stat2Paths(fileStatuses); assertEquals(2, paths.length); assertEquals(paths[0], path1); assertEquals(paths[1], path2); }
public static Path getOutputsFinalJob(final FileSystem fs, final String output) throws IOException { int largest = -1; for (final Path path : FileUtil.stat2Paths(fs.listStatus(new Path(output)))) { final String[] name = path.getName().split(DASH); if (name.length == 2 && name[0].equals(Tokens.JOB)) { if (Integer.valueOf(name[1]) > largest) largest = Integer.valueOf(name[1]); } } if (largest == -1) return new Path(output); else return new Path(output + "/" + Tokens.JOB + "-" + largest); }
public static void fileTreeRecursion(URI uri, Configuration conf, FileSystem fs) throws IOException { Path current = new Path(uri); if (fs.isFile(current)) { visit(current, fs); } else { FileStatus[] status = fs.listStatus(current); Path[] paths = FileUtil.stat2Paths(status); for (Path p : paths) { fileTreeRecursion(p.toUri(), conf, fs); } } }
/** Open the output generated by this format. */ public static MapFile.Reader[] getReaders(Path dir, Configuration conf) throws IOException { FileSystem fs = dir.getFileSystem(conf); Path[] names = FileUtil.stat2Paths(fs.listStatus(dir)); // sort names, so that hash partitioning works Arrays.sort(names); MapFile.Reader[] parts = new MapFile.Reader[names.length]; for (int i = 0; i < names.length; i++) { parts[i] = new MapFile.Reader(fs, names[i].toString(), conf); } return parts; }
@Test(timeout = 30000) public void testStat2Paths2() { Path defaultPath = new Path("file://default"); Path[] paths = FileUtil.stat2Paths(null, defaultPath); assertEquals(1, paths.length); assertEquals(defaultPath, paths[0]); paths = FileUtil.stat2Paths(null, null); assertTrue(paths != null); assertEquals(1, paths.length); assertEquals(null, paths[0]); Path path1 = new Path("file://foo"); Path path2 = new Path("file://moo"); FileStatus[] fileStatuses = new FileStatus[] { new FileStatus(3, false, 0, 0, 0, path1), new FileStatus(3, false, 0, 0, 0, path2) }; paths = FileUtil.stat2Paths(fileStatuses, defaultPath); assertEquals(2, paths.length); assertEquals(paths[0], path1); assertEquals(paths[1], path2); }
public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path[] paths = new Path[args.length]; for (int i = 0; i < paths.length; i++) { paths[i] = new Path(args[i]); } FileStatus[] status = fs.listStatus(paths); Path[] listedPaths = FileUtil.stat2Paths(status); for (Path p : listedPaths) { System.out.println(p); } }
public static void decompressPath( final FileSystem fs, final String in, final String out, final String compressedFileSuffix, final boolean deletePrevious) throws IOException { final Path inPath = new Path(in); if (fs.isFile(inPath)) HDFSTools.decompressFile(fs, in, out, deletePrevious); else { final Path outPath = new Path(out); if (!fs.exists(outPath)) fs.mkdirs(outPath); for (final Path path : FileUtil.stat2Paths(fs.globStatus(new Path(in + FOWARD_ASTERISK)))) { if (path.getName().endsWith(compressedFileSuffix)) HDFSTools.decompressFile( fs, path.toString(), outPath.toString() + FOWARD_SLASH + path.getName().split("\\.")[0], deletePrevious); } } }
@Test public void mrRun() throws Exception { FileSystem fs = dfsCluster.getFileSystem(); Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); fs.delete(inDir, true); String DATADIR = "/user/testing/testMapperReducer/data"; Path dataDir = fs.makeQualified(new Path(DATADIR)); fs.delete(dataDir, true); Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); fs.delete(outDir, true); assertTrue(fs.mkdirs(inDir)); Path INPATH = new Path(inDir, "input.txt"); OutputStream os = fs.create(INPATH); Writer wr = new OutputStreamWriter(os, StandardCharsets.UTF_8); wr.write(DATADIR + "/" + inputAvroFile); wr.close(); assertTrue(fs.mkdirs(dataDir)); fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir); JobConf jobConf = getJobConf(); jobConf.set("jobclient.output.filter", "ALL"); if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger // and set breakpoints jobConf.set("mapred.job.tracker", "local"); } jobConf.setMaxMapAttempts(1); jobConf.setMaxReduceAttempts(1); jobConf.setJar(SEARCH_ARCHIVES_JAR); int shards = 2; int maxReducers = Integer.MAX_VALUE; if (ENABLE_LOCAL_JOB_RUNNER) { // local job runner has a couple of limitations: only one reducer is supported and the // DistributedCache doesn't work. // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/ maxReducers = 1; shards = 1; } String[] args = new String[] { "--morphline-file=" + tempDir + "/test-morphlines/solrCellDocumentTypes.conf", "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose", numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) }; if (numRuns % 3 == 2) { args = concat(args, new String[] {"--fanout=2"}); } if (numRuns == 0) { // force (slow) MapReduce based randomization to get coverage for that as well args = concat( new String[] {"-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1"}, args); } MapReduceIndexerTool tool = createTool(); int res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); Job job = tool.job; assertTrue(job.isComplete()); assertTrue(job.isSuccessful()); if (numRuns % 3 != 2) { // Only run this check if mtree merge is disabled. // With mtree merge enabled the BatchWriter counters aren't available anymore because // variable "job" now refers to the merge job rather than the indexing job assertEquals( "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN, count, job.getCounters() .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()) .getValue()); } // Check the output is as expected outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir)); System.out.println("outputfiles:" + Arrays.toString(outputFiles)); UtilsForTests.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards); // run again with --dryrun mode: tool = createTool(); args = concat(args, new String[] {"--dry-run"}); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); numRuns++; }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSizeNode = 0; long minSizeRack = 0; long maxSize = 0; Configuration conf = job.getConfiguration(); // the values specified by setxxxSplitSize() takes precedence over the // values that might have been specified in the config if (minSplitSizeNode != 0) { minSizeNode = minSplitSizeNode; } else { minSizeNode = conf.getLong("mapred.min.split.size.per.node", 0); } if (minSplitSizeRack != 0) { minSizeRack = minSplitSizeRack; } else { minSizeRack = conf.getLong("mapred.min.split.size.per.rack", 0); } if (maxSplitSize != 0) { maxSize = maxSplitSize; } else { maxSize = conf.getLong("mapred.max.split.size", 0); } if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) { throw new IOException( "Minimum split size pernode " + minSizeNode + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) { throw new IOException( "Minimum split size per rack" + minSizeRack + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && minSizeNode > minSizeRack) { throw new IOException( "Minimum split size per node" + minSizeNode + " cannot be smaller than minimum split " + "size per rack " + minSizeRack); } // all the files in input set Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0])); List<InputSplit> splits = new ArrayList<InputSplit>(); if (paths.length == 0) { return splits; } // In one single iteration, process all the paths in a single pool. // Processing one pool at a time ensures that a split contains paths // from a single pool only. for (MultiPathFilter onepool : pools) { ArrayList<Path> myPaths = new ArrayList<Path>(); // pick one input path. If it matches all the filters in a pool, // add it to the output set for (int i = 0; i < paths.length; i++) { if (paths[i] == null) { // already processed continue; } Path p = new Path(paths[i].toUri().getPath()); if (onepool.accept(p)) { myPaths.add(paths[i]); // add it to my output set paths[i] = null; // already processed } } // create splits for all files in this pool. getMoreSplits( conf, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits); } // Finally, process all paths that do not belong to any pool. ArrayList<Path> myPaths = new ArrayList<Path>(); for (int i = 0; i < paths.length; i++) { if (paths[i] == null) { // already processed continue; } myPaths.add(paths[i]); } // create splits for all files that are not in any pool. getMoreSplits( conf, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits); // free up rackToNodes map rackToNodes.clear(); return splits; }
public void testSymLink() { try { boolean mayExit = false; MiniMRCluster mr = null; MiniDFSCluster dfs = null; try { Configuration conf = new Configuration(); dfs = new MiniDFSCluster(conf, 1, true, null); FileSystem fileSys = dfs.getFileSystem(); String namenode = fileSys.getUri().toString(); mr = new MiniMRCluster(1, namenode, 3); // During tests, the default Configuration will use a local mapred // So don't specify -config or -cluster String strJobtracker = "mapred.job.tracker=" + "localhost:" + mr.getJobTrackerPort(); String strNamenode = "fs.default.name=" + namenode; String argv[] = new String[] { "-input", INPUT_FILE, "-output", OUTPUT_DIR, "-mapper", map, "-reducer", reduce, // "-verbose", // "-jobconf", "stream.debug=set" "-jobconf", strNamenode, "-jobconf", strJobtracker, "-jobconf", "stream.tmpdir=" + System.getProperty("test.build.data", "/tmp"), "-jobconf", "mapred.child.java.opts=-Dcontrib.name=" + System.getProperty("contrib.name") + " " + "-Dbuild.test=" + System.getProperty("build.test") + " " + conf.get("mapred.child.java.opts", ""), "-cacheFile", fileSys.getUri() + CACHE_FILE + "#testlink" }; fileSys.delete(new Path(OUTPUT_DIR), true); DataOutputStream file = fileSys.create(new Path(INPUT_FILE)); file.writeBytes(mapString); file.close(); file = fileSys.create(new Path(CACHE_FILE)); file.writeBytes(cacheString); file.close(); job = new StreamJob(argv, mayExit); job.go(); fileSys = dfs.getFileSystem(); String line = null; Path[] fileList = FileUtil.stat2Paths(fileSys.listStatus(new Path(OUTPUT_DIR), new OutputLogFilter())); for (int i = 0; i < fileList.length; i++) { System.out.println(fileList[i].toString()); BufferedReader bread = new BufferedReader(new InputStreamReader(fileSys.open(fileList[i]))); line = bread.readLine(); System.out.println(line); } assertEquals(cacheString + "\t", line); } finally { if (dfs != null) { dfs.shutdown(); } if (mr != null) { mr.shutdown(); } } } catch (Exception e) { failTrace(e); } }
public void doTestTextBatchAppend() throws Exception { LOG.debug("Starting..."); final long rollCount = 10; final long batchSize = 2; final String fileName = "PageView"; String newPath = testPath + "/singleTextBucket"; int totalEvents = 0; int i = 1, j = 1; // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(newPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); Context context = new Context(); context.put("hdfs.path", newPath); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.batchSize", String.valueOf(batchSize)); context.put("hdfs.filePrefix", "pageview"); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); Calendar eventDate = Calendar.getInstance(); Date currentDate = new Date(); Map<String, String> header = new HashMap<String, String>(); header.put("topic", "PageView"); List<String> bodies = Lists.newArrayList(); // 将测试的事件推入到通道中 for (i = 1; i <= (rollCount * 10) / batchSize; i++) { Transaction txn = channel.getTransaction(); txn.begin(); for (j = 1; j <= batchSize; j++) { header.put("timestamp", String.valueOf(currentDate.getTime())); Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2014, i, i, i, 0); String body = "Test." + i + "." + j; event.setHeaders(header); event.setBody(body.getBytes()); bodies.add(body); channel.put(event); totalEvents++; } txn.commit(); txn.close(); // execute sink to process the events sink.process(); } sink.stop(); FileStatus[] dirStat = fs.listStatus(dirPath); Path fList[] = FileUtil.stat2Paths(dirStat); long expectedFiles = totalEvents / rollCount; if (totalEvents % rollCount > 0) { expectedFiles++; } Assert.assertEquals( "num files wrong, found: " + Lists.newArrayList(fList), expectedFiles, fList.length); // 检查所有写入文件的内容 verifyOutputTextFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies); }