@Test public void parseGroupSort() throws IOException { Plume p = new LocalPlume(); PCollection<String> logs = p.readResourceFile("log.txt"); PTable<String, Event> events = logs.map( new DoFn<String, Pair<String, Event>>() { @Override public void process(String logLine, EmitFn<Pair<String, Event>> emitter) { if (logLine.length() > 0) { Event e = new Event(logLine); emitter.emit(new Pair<String, Event>(e.getName(), e)); } } }, Plume.tableOf(strings(), strings())); Ordering<Event> ordering = new Ordering<Event>() { public int compare(Event left, Event right) { return left.compareTo(right); }; }; PTable<String, Iterable<Event>> byName = events.groupByKey(ordering); for (Pair<String, Iterable<Event>> logIter : byName) { // String nameKey = logIter.getKey(); Iterable<Event> chatEvents = logIter.getValue(); // check if ordering indeed happened in the result assertTrue(ordering.isOrdered(chatEvents)); } }
@SuppressWarnings("unchecked") @Override public void build() { init(); LazyPlume plume = new LazyPlume(); PCollection input; PCollection input2; try { // Read input input = plume.readFile("/tmp/input-wordcount.txt", collectionOf(strings())); input2 = plume.readFile("/tmp/input-moretext.txt", collectionOf(strings())); // Add it as workflow's input addInput(input); } catch (IOException e) { throw new RuntimeException(); } PCollection transform = input.map( new DoFn() { @Override public void process(Object v, EmitFn emitter) { Text t = (Text) v; emitter.emit(new Text(t.toString() + "-bar")); } }, collectionOf(strings())); addOutput(plume.flatten(input2, transform)); // flatten with another file PCollection groupedTransform = input .map( new DoFn() { @Override public void process(Object v, EmitFn emitter) { Text t = (Text) v; emitter.emit(Pair.create(t, new Text("foo"))); } }, tableOf(strings(), strings())) .groupByKey(); addOutput(groupedTransform); }
/** This test runs a chain of two "ParallelDo" operations: (x+1), (x*2) */ @Test public void testNestedMap() { // Get Plume runtime LazyPlume plume = new LazyPlume(); // Create simple data PCollection<Integer> input = plume.fromJava(Lists.newArrayList(1, 2, 3)); PCollection<Integer> output = input.map(plusOne, null).map(timesTwo, null); executeAndAssert((LazyCollection<Integer>) output, new Integer[] {4, 6, 8}); }