/** Try one deferred flatten (1,2,3)+(4,5,6) */ @Test public void testSimpleFlatten() { // Get Plume runtime LazyPlume plume = new LazyPlume(); List<Integer> l1 = Lists.newArrayList(1, 2, 3); List<Integer> l2 = Lists.newArrayList(4, 5, 6); PCollection<Integer> output = plume.flatten(plume.fromJava(l1), plume.fromJava(l2)); executeAndAssert((LazyCollection<Integer>) output, new Integer[] {1, 2, 3, 4, 5, 6}); }
/** This test runs a chain of two "ParallelDo" operations: (x+1), (x*2) */ @Test public void testNestedMap() { // Get Plume runtime LazyPlume plume = new LazyPlume(); // Create simple data PCollection<Integer> input = plume.fromJava(Lists.newArrayList(1, 2, 3)); PCollection<Integer> output = input.map(plusOne, null).map(timesTwo, null); executeAndAssert((LazyCollection<Integer>) output, new Integer[] {4, 6, 8}); }
/** Deferred execution of ((1,2,3)+(4,5,6)) => x+1 */ @Test public void testMapAndFlatten() { List<Integer> l1 = Lists.newArrayList(1, 2, 3); List<Integer> l2 = Lists.newArrayList(4, 5, 6); // Get Plume runtime LazyPlume plume = new LazyPlume(); PCollection<Integer> output = plume.flatten(plume.fromJava(l1), plume.fromJava(l2)).map(plusOne, null); executeAndAssert((LazyCollection<Integer>) output, new Integer[] {2, 3, 4, 5, 6, 7}); }
/** * Group by and combine adding all values from table (1,2),(1,3),(2,4),(2,6),(3,6),(3,9) Should * raise result (1,(2+3)),(2,(4+6)),(3,(6+9)) = (1,5),(2,10),(3,15) */ @Test public void testCombine() { DoFn<Integer, Pair<Integer, Integer>> fn = new DoFn<Integer, Pair<Integer, Integer>>() { @Override public void process(Integer v, EmitFn<Pair<Integer, Integer>> emitter) { emitter.emit(Pair.create(v, v * 2)); emitter.emit(Pair.create(v, v * 3)); } }; // Get Plume runtime LazyPlume plume = new LazyPlume(); List<Integer> l1 = Lists.newArrayList(1, 2, 3); PTable<Integer, Integer> output = plume .fromJava(l1) .map(fn, tableOf(integers(), integers())) .groupByKey() .combine( new CombinerFn<Integer>() { @Override public Integer combine(Iterable<Integer> stuff) { Integer result = 0; for (Integer i : stuff) { result += i; } return result; } }); // Get an executor LocalExecutor executor = new LocalExecutor(); LazyTable<Integer, Integer> lOutput = (LazyTable<Integer, Integer>) output; Iterable<Pair<Integer, Integer>> executedOutput = executor.execute(lOutput); List<Pair<Integer, Integer>> outputList = Lists.newArrayList(executedOutput); Collections.sort( outputList, new Comparator<Pair<Integer, ?>>() { @Override public int compare(Pair<Integer, ?> arg0, Pair<Integer, ?> arg1) { return arg0.getKey().compareTo(arg1.getKey()); } }); assertEquals(outputList.get(0).getKey().intValue(), 1); assertEquals(outputList.get(0).getValue().intValue(), 5); assertEquals(outputList.get(1).getKey().intValue(), 2); assertEquals(outputList.get(1).getValue().intValue(), 10); assertEquals(outputList.get(2).getKey().intValue(), 3); assertEquals(outputList.get(2).getValue().intValue(), 15); }
@SuppressWarnings("unchecked") @Override public void build() { init(); LazyPlume plume = new LazyPlume(); PCollection input; PCollection input2; try { // Read input input = plume.readFile("/tmp/input-wordcount.txt", collectionOf(strings())); input2 = plume.readFile("/tmp/input-moretext.txt", collectionOf(strings())); // Add it as workflow's input addInput(input); } catch (IOException e) { throw new RuntimeException(); } PCollection transform = input.map( new DoFn() { @Override public void process(Object v, EmitFn emitter) { Text t = (Text) v; emitter.emit(new Text(t.toString() + "-bar")); } }, collectionOf(strings())); addOutput(plume.flatten(input2, transform)); // flatten with another file PCollection groupedTransform = input .map( new DoFn() { @Override public void process(Object v, EmitFn emitter) { Text t = (Text) v; emitter.emit(Pair.create(t, new Text("foo"))); } }, tableOf(strings(), strings())) .groupByKey(); addOutput(groupedTransform); }
/** Try one group by from table (1,2),(1,3),(2,4),(2,6),(3,6),(3,9) */ @Test public void testGroupByKey() { // Get Plume runtime LazyPlume plume = new LazyPlume(); List<Integer> l1 = Lists.newArrayList(1, 2, 3); PTable<Integer, Iterable<Integer>> output = plume.fromJava(l1).map(plusTwoPlusThree, tableOf(integers(), integers())).groupByKey(); LazyTable<Integer, Iterable<Integer>> lOutput = (LazyTable<Integer, Iterable<Integer>>) output; // Get an executor LocalExecutor executor = new LocalExecutor(); Iterable<Pair<Integer, Iterable<Integer>>> executedOutput = executor.execute(lOutput); List<Pair<Integer, Iterable<Integer>>> outputList = Lists.newArrayList(executedOutput); Collections.sort( outputList, new Comparator<Pair<Integer, ?>>() { @Override public int compare(Pair<Integer, ?> arg0, Pair<Integer, ?> arg1) { return arg0.getKey().compareTo(arg1.getKey()); } }); assertEquals(outputList.get(0).getKey().intValue(), 1); assertEquals(outputList.get(1).getKey().intValue(), 2); assertEquals(outputList.get(2).getKey().intValue(), 3); List<Integer> lR1 = Lists.newArrayList(outputList.get(0).getValue()); List<Integer> lR2 = Lists.newArrayList(outputList.get(1).getValue()); List<Integer> lR3 = Lists.newArrayList(outputList.get(2).getValue()); Collections.sort(lR1); Collections.sort(lR2); Collections.sort(lR3); assertEquals(lR1.get(0).intValue(), 2); assertEquals(lR1.get(1).intValue(), 3); assertEquals(lR2.get(0).intValue(), 4); assertEquals(lR2.get(1).intValue(), 6); assertEquals(lR3.get(0).intValue(), 6); assertEquals(lR3.get(1).intValue(), 9); }