コード例 #1
0
ファイル: TapTest.java プロジェクト: LusciousPear/cascading
  public void testTemplateTapView() throws IOException {
    if (!new File(inputFileJoined).exists()) fail("data file not found");

    copyFromLocal(inputFileJoined);

    Tap source = new Hfs(new TextLine(new Fields("line")), inputFileJoined);

    Pipe pipe = new Pipe("test");

    pipe = new Each(pipe, new RegexSplitter(new Fields("number", "lower", "upper"), "\t"));

    Tap sink =
        new Hfs(new SequenceFile(new Fields("upper")), outputPath + "/testtemplatesview", true);

    sink = new TemplateTap((Hfs) sink, "%s-%s", new Fields("number", "lower"), 1);

    Flow flow = new FlowConnector(getProperties()).connect(source, sink, pipe);

    flow.complete();

    Tap test = new Hfs(new SequenceFile(new Fields("upper")), sink.getPath().toString() + "/1-a");
    validateLength(flow.openTapForRead(test), 1, 1);

    test = new Hfs(new SequenceFile(new Fields("upper")), sink.getPath().toString() + "/2-b");
    validateLength(flow.openTapForRead(test), 1, 1);

    TupleEntryIterator input = flow.openTapForRead(test); // open 2-b

    assertEquals("wrong value", "B", input.next().get(0));

    input.close();
  }
コード例 #2
0
ファイル: TapTest.java プロジェクト: LusciousPear/cascading
  public void testSinkDeclaredFields() throws IOException {
    if (!new File(inputFileCross).exists()) fail("data file not found");

    copyFromLocal(inputFileCross);

    Tap source = new Hfs(new TextLine(new Fields("line")), inputFileCross);

    Pipe pipe = new Pipe("test");

    pipe =
        new Each(
            pipe, new RegexSplitter(new Fields("first", "second", "third"), "\\s"), Fields.ALL);

    Tap sink =
        new Hfs(
            new TextLine(new Fields("line"), new Fields("second", "first", "third")),
            outputPath + "/declaredsinks",
            true);

    Flow flow = new FlowConnector(getProperties()).connect(source, sink, pipe);

    //    flow.writeDOT( "declaredsinks.dot" );

    flow.complete();

    validateLength(flow, 37, null);

    TupleEntryIterator iterator = flow.openSink();

    String line = iterator.next().getString(0);
    assertTrue("not equal: wrong values", line.matches("[a-z]\t[0-9]\t[A-Z]"));

    iterator.close();
  }
コード例 #3
0
  public void testInFlow() throws Exception {
    FileSystem.get(new Configuration()).delete(new Path("/tmp/input"), true);
    FileSystem.get(new Configuration()).delete(new Path("/tmp/output"), true);

    Hfs inTap = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryCollector collector = inTap.openForWrite(new HadoopFlowProcess());
    collector.add(new TupleEntry(new Fields("value"), new Tuple(BRYAN.build())));
    collector.add(new TupleEntry(new Fields("value"), new Tuple(LUCAS.build())));
    collector.close();

    Pipe inPipe = new Pipe("in");
    Pipe p =
        new Each(
            inPipe,
            new Fields("value"),
            new ExpandProto(Example.Person.class),
            new Fields("id", "name", "email", "position"));

    Hfs sink = new Hfs(new TextLine(), "/tmp/output");
    new HadoopFlowConnector().connect(inTap, sink, p).complete();

    TupleEntryIterator iter = sink.openForRead(new HadoopFlowProcess());
    List<Tuple> results = new ArrayList<Tuple>();
    while (iter.hasNext()) {
      results.add(iter.next().getTupleCopy());
    }
    assertEquals(2, results.size());

    assertEquals(
        new Tuple(0, 1, "bryan", "*****@*****.**", Example.Person.Position.CEO.getNumber())
            .toString(),
        results.get(0).toString());
    assertEquals(new Tuple(25, 2, "lucas", null, null).toString(), results.get(1).toString());
  }
コード例 #4
0
ファイル: TapTest.java プロジェクト: LusciousPear/cascading
  public void testNullsFromScheme() throws IOException {
    if (!new File(inputFileComments).exists()) fail("data file not found");

    copyFromLocal(inputFileComments);

    Tap source = new Hfs(new CommentScheme(new Fields("line")), inputFileComments);

    Pipe pipe = new Pipe("test");

    pipe = new Each(pipe, new Identity());

    Tap sink = new Hfs(new TextLine(1), outputPath + "/testnulls", true);

    Flow flow = new FlowConnector(getProperties()).connect(source, sink, pipe);

    flow.complete();

    validateLength(flow, 5, null);

    TupleEntryIterator iterator = flow.openSink();

    assertEquals("not equal: tuple.get(1)", "1 a", iterator.next().get(1));

    iterator.close();

    // confirm the tuple iterator can handle nulls from the source
    validateLength(flow.openSource(), 5);
  }
コード例 #5
0
  @Test
  public void testInFlow() throws Exception {
    FileSystem fs = FileSystem.get(new Configuration());
    fs.delete(new Path(TMP_DIR), true);

    Hfs input =
        new Hfs(new SequenceFile(new Fields("constant", "first", "second")), TMP_DIR + "/inputs");
    TupleEntryCollector collector = input.openForWrite(new HadoopFlowProcess());
    collector.add(new Tuple("constant 1", "a", "b"));
    collector.add(new Tuple("constant 2", "c", "d"));
    collector.close();

    Hfs output = new Hfs(new SequenceFile(new Fields("constant", "output")), TMP_DIR + "/outputs");

    Pipe pipe =
        Pump.prime()
            .each(new Extrude("output"), "first", "second")
            .retain("constant", "output")
            .toPipe();
    FlowDef flow = new FlowDef().addSource("input", input).addTailSink(pipe, output);
    CascadingHelper.setTestMode();
    CascadingHelper.get().getFlowConnector().connect(flow).complete();

    List<String> results = new ArrayList<String>();
    TupleEntryIterator iterator = output.openForRead(new HadoopFlowProcess());
    while (iterator.hasNext()) {
      TupleEntry tupleEntry = iterator.next();
      results.add(tupleEntry.getString(0) + "\t" + tupleEntry.getString(1));
    }
    assertEquals(
        Arrays.asList("constant 1\ta", "constant 1\tb", "constant 2\tc", "constant 2\td"), results);
  }
コード例 #6
0
  private void assertHeaders(Tap output, Flow flow) throws IOException {
    TupleEntryIterator iterator =
        flow.openTapForRead(getPlatform().getTextFile(new Fields("line"), output.getIdentifier()));

    assertEquals(iterator.next().getObject(0), "first,second,third,fourth,fifth");

    iterator.close();
  }
コード例 #7
0
 protected List<Tuple> getAllTuples(Tap sink) throws IOException {
   List<Tuple> ret = Lists.newArrayList();
   TupleEntryIterator tupleEntryIterator = sink.openForRead(CascadingUtil.get().getFlowProcess());
   while (tupleEntryIterator.hasNext()) {
     ret.add(new Tuple(tupleEntryIterator.next().getTuple()));
   }
   return ret;
 }
  public void testEvaluation() throws Exception {
    CascadingRuleCompiler crc = new CascadingRuleCompiler(defaultConfiguration);
    IDistributedCompiledRule dcr = crc.compile(rules.get(0));
    dcr.evaluate(new EvaluationContext(1, 1, 1));
    FlowAssembly fa = dcr.getFlowAssembly();

    TupleEntryIterator tei = fa.openSink();
    int size = 0;
    while (tei.hasNext()) {
      TupleEntry te = tei.next();
      logger.info(te.getTuple().toString());
      size++;
    }
    assertEquals(1, size);
  }
コード例 #9
0
  @Test
  public void testTempPath() throws Exception {
    BasePlatform platform = new HadoopPlatform(HadoopPlatformTest.class);

    BasePath tempDir = platform.getTempDir();

    // Verify we can write and then read
    BasePath testDir = platform.makePath(tempDir, UUID.randomUUID().toString());

    Scheme scheme = platform.makeBinaryScheme(new Fields("name", "age"));
    Tap tap = platform.makeTap(scheme, testDir);
    TupleEntryCollector writer = tap.openForWrite(platform.makeFlowProcess());
    writer.add(new Tuple("ken", 37));
    writer.close();

    TupleEntryIterator iter = tap.openForRead(platform.makeFlowProcess());
    assertTrue(iter.hasNext());
    TupleEntry te = iter.next();
    assertEquals("ken", te.getString("name"));
    assertFalse(iter.hasNext());
    iter.close();
  }
コード例 #10
0
  @Test
  public void test() throws Exception {
    GenerateTermsOptions options = generateTerms("build/test/GenerateTermsFlowTest/test");

    // Verify that we get expected results in the output
    BasePlatform platform = options.getPlatform(GenerateTermsFlowTest.class);
    Tap tap =
        platform.makeTap(
            platform.makeBinaryScheme(WikiTermDatum.FIELDS),
            options.getWorkingSubdirPath(WorkingConfig.TERMS_SUBDIR_NAME));
    TupleEntryIterator iter = tap.openForRead(platform.makeFlowProcess());
    WikiTermDatum datum = new WikiTermDatum();
    while (iter.hasNext()) {
      datum.setTupleEntry(iter.next());
      // TODO verify that each field looks correct?
      // System.out.println(datum.getTuple());
    }

    // Verify we got the expected number of results.
    Map<String, Long> counters = options.getCounters(GenerateTermsFlow.class);
    String counterName = WorkflowOptions.getFlowCounterName(WikiwordsCounters.ARTICLES);
    assertEquals(15, (long) counters.get(counterName));
  }
コード例 #11
0
  private void runQuotedText(String path, String inputData, String delimiter, boolean useAll)
      throws IOException {
    Object[][] results =
        new Object[][] {
          {"foo", "bar", "baz", "bin", 1L},
          {"foo", "bar", "baz", "bin", 2L},
          {"foo", "bar" + delimiter + "bar", "baz", "bin", 3L},
          {"foo", "bar\"" + delimiter + "bar", "baz", "bin", 4L},
          {"foo", "bar\"\"" + delimiter + "bar", "baz", "bin", 5L},
          {null, null, "baz", null, 6L},
          {null, null, null, null, 7L},
          {"foo", null, null, null, 8L},
          {null, null, null, null, 9L},
          {"f", null, null, null, 10L}, // this one is quoted, single char
          {"f", null, null, ",bin", 11L},
          {"f", null, null, "bin,", 11L}
        };

    if (useAll) {
      for (int i = 0; i < results.length; i++) {
        Object[] result = results[i];

        for (int j = 0; j < result.length; j++)
          result[j] = result[j] != null ? result[j].toString() : null;
      }
    }

    Tuple[] tuples = new Tuple[results.length];

    for (int i = 0; i < results.length; i++) tuples[i] = new Tuple(results[i]);

    Class[] types =
        new Class[] {String.class, String.class, String.class, String.class, long.class};
    Fields fields = new Fields("first", "second", "third", "fourth", "fifth");

    if (useAll) {
      types = null;
      fields = Fields.ALL;
    }

    Tap input =
        getPlatform()
            .getDelimitedFile(fields, false, delimiter, "\"", types, inputData, SinkMode.KEEP);
    Tap output =
        getPlatform()
            .getDelimitedFile(
                fields,
                false,
                delimiter,
                "\"",
                types,
                getOutputPath("quoted/" + path + "" + useAll),
                SinkMode.REPLACE);

    Pipe pipe = new Pipe("pipe");

    Flow flow = getPlatform().getFlowConnector().connect(input, output, pipe);

    flow.complete();

    validateLength(flow, results.length, 5);

    // validate input parsing compares to expected, and results compare to expected
    TupleEntryIterator iterator = flow.openSource();

    int count = 0;
    while (iterator.hasNext()) {
      Tuple tuple = iterator.next().getTuple();
      assertEquals(tuples[count++], tuple);
    }

    iterator = flow.openSink();

    count = 0;
    while (iterator.hasNext()) {
      Tuple tuple = iterator.next().getTuple();
      assertEquals(tuples[count++], tuple);
    }
  }