/**
   * Create a Cascading Flow that will parse a set of mbox files and emit a tab-separated text file
   * with fields for the msgId, author, email address, etc.
   *
   * <p>Note this Flow will only run locally, since we're using the cascading.utils LocalPlatform.
   *
   * @param options Settings for the flow
   * @return Flow suitable for execution
   * @throws Exception
   */
  public static Flow createFlow(ParseEmailArchivesOptions options) throws Exception {
    BasePlatform platform = new LocalPlatform(ParseEmailArchivesWorkflow.class);

    // We'll read individual file paths from the input file.
    BasePath inputPath = platform.makePath(options.getFileList());
    Tap sourceTap = platform.makeTap(platform.makeTextScheme(), inputPath);

    Pipe emailPipe = new Pipe("emails");
    emailPipe = new Each(emailPipe, new Fields("line"), new MboxSplitterFunction());
    emailPipe = new Each(emailPipe, new ParseEmail());

    BasePath outputPath = platform.makePath(options.getOutputDir());
    TextLineScheme scheme = new TextLineScheme(false);
    Tap sinkTap = platform.makeTap(scheme, outputPath, SinkMode.REPLACE);

    FlowConnector flowConnector = platform.makeFlowConnector();
    Flow flow = flowConnector.connect(sourceTap, sinkTap, emailPipe);
    return flow;
  }
  @Test
  public void testTempPath() throws Exception {
    BasePlatform platform = new HadoopPlatform(HadoopPlatformTest.class);

    BasePath tempDir = platform.getTempDir();

    // Verify we can write and then read
    BasePath testDir = platform.makePath(tempDir, UUID.randomUUID().toString());

    Scheme scheme = platform.makeBinaryScheme(new Fields("name", "age"));
    Tap tap = platform.makeTap(scheme, testDir);
    TupleEntryCollector writer = tap.openForWrite(platform.makeFlowProcess());
    writer.add(new Tuple("ken", 37));
    writer.close();

    TupleEntryIterator iter = tap.openForRead(platform.makeFlowProcess());
    assertTrue(iter.hasNext());
    TupleEntry te = iter.next();
    assertEquals("ken", te.getString("name"));
    assertFalse(iter.hasNext());
    iter.close();
  }
  @Test
  public void test() throws Exception {
    GenerateTermsOptions options = generateTerms("build/test/GenerateTermsFlowTest/test");

    // Verify that we get expected results in the output
    BasePlatform platform = options.getPlatform(GenerateTermsFlowTest.class);
    Tap tap =
        platform.makeTap(
            platform.makeBinaryScheme(WikiTermDatum.FIELDS),
            options.getWorkingSubdirPath(WorkingConfig.TERMS_SUBDIR_NAME));
    TupleEntryIterator iter = tap.openForRead(platform.makeFlowProcess());
    WikiTermDatum datum = new WikiTermDatum();
    while (iter.hasNext()) {
      datum.setTupleEntry(iter.next());
      // TODO verify that each field looks correct?
      // System.out.println(datum.getTuple());
    }

    // Verify we got the expected number of results.
    Map<String, Long> counters = options.getCounters(GenerateTermsFlow.class);
    String counterName = WorkflowOptions.getFlowCounterName(WikiwordsCounters.ARTICLES);
    assertEquals(15, (long) counters.get(counterName));
  }