@Test public void testRename() throws Exception { BasePlatform platform = new HadoopPlatform(HadoopPlatformTest.class); final String targetDirname = "build/test/HadoopPlatformTest/testRename"; BasePath path = platform.makePath(targetDirname); if (path.exists()) { path.delete(true); } path.mkdirs(); BasePath src = platform.makePath(path, "src"); src.mkdirs(); assertTrue(src.exists()); BasePath dst = platform.makePath(path, "dst"); assertFalse(dst.exists()); platform.rename(src, dst); assertTrue(dst.exists()); assertFalse(src.exists()); }
@Test public void testPathCreation() throws Exception { // Clear it out first. final String targetDirname = "build/test/HadoopPlatformTest/testPathCreation"; File targetDirFile = new File(targetDirname); FileUtils.deleteDirectory(targetDirFile); assertFalse(targetDirFile.exists()); BasePlatform platform = new HadoopPlatform(HadoopPlatformTest.class); BasePath path = platform.makePath(targetDirname); assertEquals(targetDirname, path.getPath()); assertEquals(targetDirFile.toURI().toString(), path.getAbsolutePath()); assertEquals(targetDirFile.toURI().toString(), path.toString()); assertFalse(path.exists()); assertTrue(path.mkdirs()); assertTrue(path.isDirectory()); assertFalse(path.isFile()); assertTrue(targetDirFile.exists()); assertTrue(targetDirFile.isDirectory()); // Check out sub-dir support. File subDirFile = new File(targetDirFile, "subdir"); BasePath child = platform.makePath(path, "subdir"); assertEquals(targetDirname + "/" + "subdir", child.getPath()); assertEquals(subDirFile.toURI().toString(), child.getAbsolutePath()); assertFalse(child.exists()); assertTrue(child.mkdirs()); assertTrue(child.isDirectory()); assertFalse(child.isFile()); assertTrue(subDirFile.exists()); assertTrue(subDirFile.isDirectory()); }
@Test public void test() throws Exception { GenerateTermsOptions options = generateTerms("build/test/GenerateTermsFlowTest/test"); // Verify that we get expected results in the output BasePlatform platform = options.getPlatform(GenerateTermsFlowTest.class); Tap tap = platform.makeTap( platform.makeBinaryScheme(WikiTermDatum.FIELDS), options.getWorkingSubdirPath(WorkingConfig.TERMS_SUBDIR_NAME)); TupleEntryIterator iter = tap.openForRead(platform.makeFlowProcess()); WikiTermDatum datum = new WikiTermDatum(); while (iter.hasNext()) { datum.setTupleEntry(iter.next()); // TODO verify that each field looks correct? // System.out.println(datum.getTuple()); } // Verify we got the expected number of results. Map<String, Long> counters = options.getCounters(GenerateTermsFlow.class); String counterName = WorkflowOptions.getFlowCounterName(WikiwordsCounters.ARTICLES); assertEquals(15, (long) counters.get(counterName)); }
/** * Create a Cascading Flow that will parse a set of mbox files and emit a tab-separated text file * with fields for the msgId, author, email address, etc. * * <p>Note this Flow will only run locally, since we're using the cascading.utils LocalPlatform. * * @param options Settings for the flow * @return Flow suitable for execution * @throws Exception */ public static Flow createFlow(ParseEmailArchivesOptions options) throws Exception { BasePlatform platform = new LocalPlatform(ParseEmailArchivesWorkflow.class); // We'll read individual file paths from the input file. BasePath inputPath = platform.makePath(options.getFileList()); Tap sourceTap = platform.makeTap(platform.makeTextScheme(), inputPath); Pipe emailPipe = new Pipe("emails"); emailPipe = new Each(emailPipe, new Fields("line"), new MboxSplitterFunction()); emailPipe = new Each(emailPipe, new ParseEmail()); BasePath outputPath = platform.makePath(options.getOutputDir()); TextLineScheme scheme = new TextLineScheme(false); Tap sinkTap = platform.makeTap(scheme, outputPath, SinkMode.REPLACE); FlowConnector flowConnector = platform.makeFlowConnector(); Flow flow = flowConnector.connect(sourceTap, sinkTap, emailPipe); return flow; }
@Test public void testTempPath() throws Exception { BasePlatform platform = new HadoopPlatform(HadoopPlatformTest.class); BasePath tempDir = platform.getTempDir(); // Verify we can write and then read BasePath testDir = platform.makePath(tempDir, UUID.randomUUID().toString()); Scheme scheme = platform.makeBinaryScheme(new Fields("name", "age")); Tap tap = platform.makeTap(scheme, testDir); TupleEntryCollector writer = tap.openForWrite(platform.makeFlowProcess()); writer.add(new Tuple("ken", 37)); writer.close(); TupleEntryIterator iter = tap.openForRead(platform.makeFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals("ken", te.getString("name")); assertFalse(iter.hasNext()); iter.close(); }