private void run(PhysicalPlan pp, String expectedFile) throws Exception { String compiledPlan, goldenPlan = null; int MAX_SIZE = 100000; MRCompiler comp = new MRCompiler(pp, pc); comp.compile(); MROperPlan mrp = comp.getMRPlan(); PlanPrinter ppp = new PlanPrinter(mrp); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ppp.print(baos); compiledPlan = baos.toString(); if (generate) { FileOutputStream fos = new FileOutputStream(expectedFile); fos.write(baos.toByteArray()); return; } FileInputStream fis = new FileInputStream(expectedFile); byte[] b = new byte[MAX_SIZE]; int len = fis.read(b); goldenPlan = new String(b, 0, len); if (goldenPlan.charAt(len - 1) == '\n') goldenPlan = goldenPlan.substring(0, len - 1); pp.explain(System.out); System.out.println(); System.out.println("<<<" + compiledPlan + ">>>"); System.out.println("-------------"); System.out.println("Golden"); System.out.println("<<<" + goldenPlan + ">>>"); System.out.println("-------------"); assertEquals(goldenPlan, compiledPlan); }
@Test public void testMergeJoin() throws Exception { String query = "a = load '/tmp/input1';" + "b = load '/tmp/input2';" + "c = join a by $0, b by $0 using 'merge';" + "store c into '/tmp/output1';"; PhysicalPlan pp = Util.buildPp(pigServer, query); MRCompiler comp = new MRCompiler(pp, pc); comp.compile(); MROperPlan mrp = comp.getMRPlan(); assertTrue(mrp.size() == 2); MapReduceOper mrOp0 = mrp.getRoots().get(0); assertTrue(mrOp0.mapPlan.size() == 2); PhysicalOperator load0 = mrOp0.mapPlan.getRoots().get(0); MergeJoinIndexer func = (MergeJoinIndexer) PigContext.instantiateFuncFromSpec(((POLoad) load0).getLFile().getFuncSpec()); Field lrField = MergeJoinIndexer.class.getDeclaredField("lr"); lrField.setAccessible(true); POLocalRearrange lr = (POLocalRearrange) lrField.get(func); List<PhysicalPlan> innerPlans = lr.getPlans(); PhysicalOperator localrearrange0 = mrOp0.mapPlan.getSuccessors(load0).get(0); assertTrue(localrearrange0 instanceof POLocalRearrange); assertTrue(mrOp0.reducePlan.size() == 3); PhysicalOperator pack0 = mrOp0.reducePlan.getRoots().get(0); assertTrue(pack0 instanceof POPackage); PhysicalOperator foreach0 = mrOp0.reducePlan.getSuccessors(pack0).get(0); assertTrue(foreach0 instanceof POForEach); PhysicalOperator store0 = mrOp0.reducePlan.getSuccessors(foreach0).get(0); assertTrue(store0 instanceof POStore); assertTrue(innerPlans.size() == 1); PhysicalPlan innerPlan = innerPlans.get(0); assertTrue(innerPlan.size() == 1); PhysicalOperator project = innerPlan.getRoots().get(0); assertTrue(project instanceof POProject); assertTrue(((POProject) project).getColumn() == 0); MapReduceOper mrOp1 = mrp.getSuccessors(mrOp0).get(0); assertTrue(mrOp1.mapPlan.size() == 3); PhysicalOperator load1 = mrOp1.mapPlan.getRoots().get(0); assertTrue(load1 instanceof POLoad); PhysicalOperator mergejoin1 = mrOp1.mapPlan.getSuccessors(load1).get(0); assertTrue(mergejoin1 instanceof POMergeJoin); PhysicalOperator store1 = mrOp1.mapPlan.getSuccessors(mergejoin1).get(0); assertTrue(store1 instanceof POStore); assertTrue(mrOp1.reducePlan.isEmpty()); }