@Test public void testLimitAdjusterFuncShipped() throws Exception { String query = "a = load 'input';" + "b = order a by $0 parallel 2;" + "c = limit b 7;" + "store c into 'output' using " + PigStorageNoDefCtor.class.getName() + "('\t');"; PhysicalPlan pp = Util.buildPp(pigServerMR, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); LimitAdjuster la = new LimitAdjuster(mrPlan, pc); la.visit(); la.adjust(); MapReduceOper mrOper = mrPlan.getRoots().get(0); int count = 1; while (mrPlan.getSuccessors(mrOper) != null) { mrOper = mrPlan.getSuccessors(mrOper).get(0); ++count; } assertEquals(4, count); MapReduceOper op = mrPlan.getLeaves().get(0); assertTrue(op.UDFs.contains(new FuncSpec(PigStorageNoDefCtor.class.getName()) + "('\t')")); }
@Test public void testMergeJoin() throws Exception { String query = "a = load '/tmp/input1';" + "b = load '/tmp/input2';" + "c = join a by $0, b by $0 using 'merge';" + "store c into '/tmp/output1';"; PhysicalPlan pp = Util.buildPp(pigServer, query); MRCompiler comp = new MRCompiler(pp, pc); comp.compile(); MROperPlan mrp = comp.getMRPlan(); assertTrue(mrp.size() == 2); MapReduceOper mrOp0 = mrp.getRoots().get(0); assertTrue(mrOp0.mapPlan.size() == 2); PhysicalOperator load0 = mrOp0.mapPlan.getRoots().get(0); MergeJoinIndexer func = (MergeJoinIndexer) PigContext.instantiateFuncFromSpec(((POLoad) load0).getLFile().getFuncSpec()); Field lrField = MergeJoinIndexer.class.getDeclaredField("lr"); lrField.setAccessible(true); POLocalRearrange lr = (POLocalRearrange) lrField.get(func); List<PhysicalPlan> innerPlans = lr.getPlans(); PhysicalOperator localrearrange0 = mrOp0.mapPlan.getSuccessors(load0).get(0); assertTrue(localrearrange0 instanceof POLocalRearrange); assertTrue(mrOp0.reducePlan.size() == 3); PhysicalOperator pack0 = mrOp0.reducePlan.getRoots().get(0); assertTrue(pack0 instanceof POPackage); PhysicalOperator foreach0 = mrOp0.reducePlan.getSuccessors(pack0).get(0); assertTrue(foreach0 instanceof POForEach); PhysicalOperator store0 = mrOp0.reducePlan.getSuccessors(foreach0).get(0); assertTrue(store0 instanceof POStore); assertTrue(innerPlans.size() == 1); PhysicalPlan innerPlan = innerPlans.get(0); assertTrue(innerPlan.size() == 1); PhysicalOperator project = innerPlan.getRoots().get(0); assertTrue(project instanceof POProject); assertTrue(((POProject) project).getColumn() == 0); MapReduceOper mrOp1 = mrp.getSuccessors(mrOp0).get(0); assertTrue(mrOp1.mapPlan.size() == 3); PhysicalOperator load1 = mrOp1.mapPlan.getRoots().get(0); assertTrue(load1 instanceof POLoad); PhysicalOperator mergejoin1 = mrOp1.mapPlan.getSuccessors(load1).get(0); assertTrue(mergejoin1 instanceof POMergeJoin); PhysicalOperator store1 = mrOp1.mapPlan.getSuccessors(mergejoin1).get(0); assertTrue(store1 instanceof POStore); assertTrue(mrOp1.reducePlan.isEmpty()); }
/** * Test to ensure that the order by without parallel followed by a limit, i.e., top k always * produces the correct number of map reduce jobs. In the testcase below since we are running the * unit test locally, we will get reduce parallelism as 1. So we will NOT introduce the extra MR * job to do a final limit */ @Test public void testNumReducersInLimit() throws Exception { String query = "a = load 'input';" + "b = order a by $0;" + "c = limit b 10;" + "store c into 'output';"; PhysicalPlan pp = Util.buildPp(pigServer, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); MapReduceOper mrOper = mrPlan.getRoots().get(0); int count = 1; while (mrPlan.getSuccessors(mrOper) != null) { mrOper = mrPlan.getSuccessors(mrOper).get(0); ++count; } assertEquals(3, count); }
@Test public void testUDFInMergedJoin() throws Exception { String query = "a = load 'input1';" + "b = load 'input2' using " + TestIndexableLoadFunc.class.getName() + "();" + "c = join a by $0, b by $0 using 'merge';" + "store c into 'output';"; PhysicalPlan pp = Util.buildPp(pigServer, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); MapReduceOper mrOper = mrPlan.getRoots().get(0); assertTrue(mrOper.UDFs.contains(TestIndexableLoadFunc.class.getName())); }
@Test public void testUDFInJoin() throws Exception { String query = "a = load 'input1' using BinStorage();" + "b = load 'input2';" + "c = join a by $0, b by $0;" + "store c into 'output';"; PhysicalPlan pp = Util.buildPp(pigServer, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); MapReduceOper mrOper = mrPlan.getRoots().get(0); assertEquals(2, mrOper.UDFs.size()); assertEquals(2, mrOper.UDFs.size()); assertTrue(mrOper.UDFs.contains("BinStorage")); assertTrue(mrOper.UDFs.contains("org.apache.pig.builtin.PigStorage")); }
/** * Test to ensure that the order by with parallel followed by a limit, i.e., top k always produces * the correct number of map reduce jobs */ @Test public void testNumReducersInLimitWithParallel() throws Exception { planTester.buildPlan("a = load 'input';"); planTester.buildPlan("b = order a by $0 parallel 2;"); planTester.buildPlan("c = limit b 10;"); LogicalPlan lp = planTester.buildPlan("store c into '/tmp';"); PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); MapReduceOper mrOper = mrPlan.getRoots().get(0); int count = 1; while (mrPlan.getSuccessors(mrOper) != null) { mrOper = mrPlan.getSuccessors(mrOper).get(0); ++count; } assertTrue(count == 4); }
// PIG-2146 @Test public void testStorerLimit() throws Exception { // test if the POStore in the 1st mr plan // use the right StoreFunc String query = "a = load 'input1';" + "b = limit a 10;" + "store b into 'output' using " + PigStorageNoDefCtor.class.getName() + "(',');"; PhysicalPlan pp = Util.buildPp(pigServer, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); LimitAdjuster la = new LimitAdjuster(mrPlan, pc); la.visit(); la.adjust(); MapReduceOper firstMrOper = mrPlan.getRoots().get(0); POStore store = (POStore) firstMrOper.reducePlan.getLeaves().get(0); assertEquals(store.getStoreFunc().getClass().getName(), "org.apache.pig.impl.io.InterStorage"); }