/** * Change the algebriac function type for algebraic functions in map and combine In map and * combine the algebraic functions will be leaf of the plan * * @param fe * @param type * @throws PlanException */ private static void changeFunc(POForEach fe, byte type) throws PlanException { for (PhysicalPlan plan : fe.getInputPlans()) { List<PhysicalOperator> leaves = plan.getLeaves(); if (leaves == null || leaves.size() != 1) { int errCode = 2019; String msg = "Expected to find plan with single leaf. Found " + leaves.size() + " leaves."; throw new PlanException(msg, errCode, PigException.BUG); } PhysicalOperator leaf = leaves.get(0); if (leaf instanceof POProject) { continue; } if (!(leaf instanceof POUserFunc)) { int errCode = 2020; String msg = "Expected to find plan with UDF or project leaf. Found " + leaf.getClass().getSimpleName(); throw new PlanException(msg, errCode, PigException.BUG); } POUserFunc func = (POUserFunc) leaf; try { func.setAlgebraicFunction(type); } catch (ExecException e) { int errCode = 2075; String msg = "Could not set algebraic function type."; throw new PlanException(msg, errCode, PigException.BUG, e); } } }
/** * add algebraic functions with appropriate projection to new foreach in combiner * * @param cfe - the new foreach in combiner * @param op2newpos - mapping of physical operator to position in input * @throws CloneNotSupportedException * @throws PlanException */ private static void addAlgebraicFuncToCombineFE( POForEach cfe, Map<PhysicalOperator, Integer> op2newpos) throws CloneNotSupportedException, PlanException { // an array that we will first populate with physical operators in order // of their position in input. Used while adding plans to combine // foreach just so that output of combine foreach same positions as // input. That means the same operator to position mapping can be used // by reduce as well PhysicalOperator[] opsInOrder = new PhysicalOperator[op2newpos.size() + 1]; for (Map.Entry<PhysicalOperator, Integer> op2pos : op2newpos.entrySet()) { opsInOrder[op2pos.getValue()] = op2pos.getKey(); } // first position is used by group column and a plan has been added for // it, so start with 1 for (int i = 1; i < opsInOrder.length; i++) { // create new inner plan for foreach add cloned copy of given // physical operator and a new project. Even if the udf in query // takes multiple input, only one project needs to be added because // input to this udf will be the INITIAL version of udf evaluated in // map. PhysicalPlan newPlan = new PhysicalPlan(); PhysicalOperator newOp = opsInOrder[i].clone(); newPlan.add(newOp); POProject proj = new POProject(createOperatorKey(cfe.getOperatorKey().getScope()), 1, i); proj.setResultType(DataType.BAG); newPlan.add(proj); newPlan.connect(proj, newOp); cfe.addInputPlan(newPlan, false); } }
/** Configures the Reduce plan, the POPackage operator and the reporter thread */ @SuppressWarnings("unchecked") @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); inIllustrator = inIllustrator(context); if (inIllustrator) pack = getPack(context); Configuration jConf = context.getConfiguration(); SpillableMemoryManager.configure(ConfigurationUtil.toProperties(jConf)); context .getConfiguration() .set( PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId())); sJobContext = context; sJobConfInternal.set(context.getConfiguration()); sJobConf = context.getConfiguration(); try { PigContext.setPackageImportList( (ArrayList<String>) ObjectSerializer.deserialize(jConf.get("udf.import.list"))); pigContext = (PigContext) ObjectSerializer.deserialize(jConf.get("pig.pigContext")); // This attempts to fetch all of the generated code from the distributed cache, and resolve // it SchemaTupleBackend.initialize(jConf, pigContext); if (rp == null) rp = (PhysicalPlan) ObjectSerializer.deserialize(jConf.get("pig.reducePlan")); stores = PlanHelper.getPhysicalOperators(rp, POStore.class); if (!inIllustrator) pack = (POPackage) ObjectSerializer.deserialize(jConf.get("pig.reduce.package")); // To be removed if (rp.isEmpty()) log.debug("Reduce Plan empty!"); else { ByteArrayOutputStream baos = new ByteArrayOutputStream(); rp.explain(baos); log.debug(baos.toString()); } pigReporter = new ProgressableReporter(); if (!(rp.isEmpty())) { roots = rp.getRoots().toArray(new PhysicalOperator[1]); leaf = rp.getLeaves().get(0); } // Get the UDF specific context MapRedUtil.setupUDFContext(jConf); } catch (IOException ioe) { String msg = "Problem while configuring reduce plan."; throw new RuntimeException(msg, ioe); } log.info( "Aliases being processed per job phase (AliasName[line,offset]): " + jConf.get("pig.alias.location")); Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get()); }
@Test public void testSim6() throws Exception { PhysicalPlan php = new PhysicalPlan(); PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain(); PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain(); POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp(); POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp(); ldGrpChain1.addAsLeaf(lr1); ldGrpChain2.addAsLeaf(lr2); php.merge(ldGrpChain1); php.merge(ldGrpChain2); POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp(); php.addAsLeaf(gr); POPackage pk = GenPhyOp.topPackageOp(); php.addAsLeaf(pk); POStore st = GenPhyOp.topStoreOp(); php.addAsLeaf(st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC6.gld"); }
@Test(expected = MRCompilerException.class) public void testMRCompilerErr() throws Exception { String query = "a = load 'input';" + "b = filter a by $0 > 5;" + "store b into 'output';"; PhysicalPlan pp = Util.buildPp(pigServer, query); pp.remove(pp.getRoots().get(0)); try { Util.buildMRPlan(new PhysicalPlan(), pc); } catch (MRCompilerException mrce) { assertEquals(2053, mrce.getErrorCode()); throw mrce; } }
@Test public void testMergeJoin() throws Exception { String query = "a = load '/tmp/input1';" + "b = load '/tmp/input2';" + "c = join a by $0, b by $0 using 'merge';" + "store c into '/tmp/output1';"; PhysicalPlan pp = Util.buildPp(pigServer, query); MRCompiler comp = new MRCompiler(pp, pc); comp.compile(); MROperPlan mrp = comp.getMRPlan(); assertTrue(mrp.size() == 2); MapReduceOper mrOp0 = mrp.getRoots().get(0); assertTrue(mrOp0.mapPlan.size() == 2); PhysicalOperator load0 = mrOp0.mapPlan.getRoots().get(0); MergeJoinIndexer func = (MergeJoinIndexer) PigContext.instantiateFuncFromSpec(((POLoad) load0).getLFile().getFuncSpec()); Field lrField = MergeJoinIndexer.class.getDeclaredField("lr"); lrField.setAccessible(true); POLocalRearrange lr = (POLocalRearrange) lrField.get(func); List<PhysicalPlan> innerPlans = lr.getPlans(); PhysicalOperator localrearrange0 = mrOp0.mapPlan.getSuccessors(load0).get(0); assertTrue(localrearrange0 instanceof POLocalRearrange); assertTrue(mrOp0.reducePlan.size() == 3); PhysicalOperator pack0 = mrOp0.reducePlan.getRoots().get(0); assertTrue(pack0 instanceof POPackage); PhysicalOperator foreach0 = mrOp0.reducePlan.getSuccessors(pack0).get(0); assertTrue(foreach0 instanceof POForEach); PhysicalOperator store0 = mrOp0.reducePlan.getSuccessors(foreach0).get(0); assertTrue(store0 instanceof POStore); assertTrue(innerPlans.size() == 1); PhysicalPlan innerPlan = innerPlans.get(0); assertTrue(innerPlan.size() == 1); PhysicalOperator project = innerPlan.getRoots().get(0); assertTrue(project instanceof POProject); assertTrue(((POProject) project).getColumn() == 0); MapReduceOper mrOp1 = mrp.getSuccessors(mrOp0).get(0); assertTrue(mrOp1.mapPlan.size() == 3); PhysicalOperator load1 = mrOp1.mapPlan.getRoots().get(0); assertTrue(load1 instanceof POLoad); PhysicalOperator mergejoin1 = mrOp1.mapPlan.getSuccessors(load1).get(0); assertTrue(mergejoin1 instanceof POMergeJoin); PhysicalOperator store1 = mrOp1.mapPlan.getSuccessors(mergejoin1).get(0); assertTrue(store1 instanceof POStore); assertTrue(mrOp1.reducePlan.isEmpty()); }
/** * Replace old POLocalRearrange with new pre-combine LR, add new map foreach, new * map-local-rearrange, and connect them * * @param mapPlan * @param preCombinerLR * @param mfe * @param mapAgg * @param mlr * @throws PlanException */ private static void patchUpMap( PhysicalPlan mapPlan, POPreCombinerLocalRearrange preCombinerLR, POForEach mfe, POPartialAgg mapAgg, POLocalRearrange mlr) throws PlanException { POLocalRearrange oldLR = (POLocalRearrange) mapPlan.getLeaves().get(0); mapPlan.replace(oldLR, preCombinerLR); mapPlan.add(mfe); mapPlan.connect(preCombinerLR, mfe); // the operator before local rearrange PhysicalOperator opBeforeLR = mfe; if (mapAgg != null) { mapPlan.add(mapAgg); mapPlan.connect(mfe, mapAgg); opBeforeLR = mapAgg; } mapPlan.add(mlr); mapPlan.connect(opBeforeLR, mlr); }
/** * Recursively clone op and its predecessors from pplan and add them to newplan * * @param op * @param pplan * @param newplan * @return * @throws CloneNotSupportedException * @throws PlanException */ private static PhysicalOperator addPredecessorsToPlan( PhysicalOperator op, PhysicalPlan pplan, PhysicalPlan newplan) throws CloneNotSupportedException, PlanException { PhysicalOperator newOp = op.clone(); newplan.add(newOp); if (pplan.getPredecessors(op) == null || pplan.getPredecessors(op).size() == 0) { return newOp; } for (PhysicalOperator pred : pplan.getPredecessors(op)) { PhysicalOperator newPred = addPredecessorsToPlan(pred, pplan, newplan); newplan.connect(newPred, newOp); } return newOp; }
/** * Create a new foreach with same scope,alias as given foreach add an inner plan that projects the * group column, which is going to be the first input * * @param foreach source foreach * @param keyType type for group-by key * @return new POForeach */ private static POForEach createForEachWithGrpProj(POForEach foreach, byte keyType) { String scope = foreach.getOperatorKey().scope; POForEach newFE = new POForEach(createOperatorKey(scope), new ArrayList<PhysicalPlan>()); newFE.addOriginalLocation(foreach.getAlias(), foreach.getOriginalLocations()); newFE.setResultType(foreach.getResultType()); // create plan that projects the group column PhysicalPlan grpProjPlan = new PhysicalPlan(); // group by column is the first column POProject proj = new POProject(createOperatorKey(scope), 1, 0); proj.setResultType(keyType); grpProjPlan.add(proj); newFE.addInputPlan(grpProjPlan, false); return newFE; }
@Test public void testMRCompilerErr() throws Exception { planTester.buildPlan("a = load 'input';"); LogicalPlan lp = planTester.buildPlan("b = filter a by $0 > 5;"); PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc); pp.remove(pp.getRoots().get(0)); try { Util.buildMRPlan(new PhysicalPlan(), pc); fail("Expected failure."); } catch (MRCompilerException mrce) { assertTrue(mrce.getErrorCode() == 2053); } }
public void testLimit() throws Exception { PhysicalPlan php = new PhysicalPlan(); POLoad lC = GenPhyOp.topLoadOp(); php.add(lC); POLimit op = new POLimit(new OperatorKey("", r.nextLong()), -1, null); php.add(op); php.connect(lC, op); POStore st = GenPhyOp.topStoreOp(); php.addAsLeaf(st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC17.gld"); }
public void testSim5() throws Exception { PhysicalPlan php = new PhysicalPlan(); PhysicalPlan ldFil1 = GenPhyOp.loadedFilter(); PhysicalPlan ldFil2 = GenPhyOp.loadedFilter(); php.merge(ldFil1); php.merge(ldFil2); POUnion un = GenPhyOp.topUnionOp(); php.addAsLeaf(un); POStore st = GenPhyOp.topStoreOp(); php.add(st); php.connect(un, st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC5.gld"); }
// return: false-more output // true- end of processing public boolean processOnePackageOutput(Context oc) throws IOException, InterruptedException { Result res = pack.getNextTuple(); if (res.returnStatus == POStatus.STATUS_OK) { Tuple packRes = (Tuple) res.result; if (rp.isEmpty()) { oc.write(null, packRes); return false; } for (int i = 0; i < roots.length; i++) { roots[i].attachInput(packRes); } runPipeline(leaf); } if (res.returnStatus == POStatus.STATUS_NULL) { return false; } if (res.returnStatus == POStatus.STATUS_ERR) { int errCode = 2093; String msg = "Encountered error in package operator while processing group."; throw new ExecException(msg, errCode, PigException.BUG); } if (res.returnStatus == POStatus.STATUS_EOP) { return true; } return false; }
@Test(expected = MRCompilerException.class) public void testMRCompilerErr1() throws Exception { PhysicalPlan pp = new PhysicalPlan(); PhysicalPlan ldFil1 = GenPhyOp.loadedFilter(); pp.merge(ldFil1); POSplit op = GenPhyOp.topSplitOp(); pp.addAsLeaf(op); try { Util.buildMRPlan(pp, pc); } catch (MRCompilerException mrce) { assertEquals(2025, mrce.getErrorCode()); throw mrce; } }
private void run(PhysicalPlan pp, String expectedFile) throws Exception { String compiledPlan, goldenPlan = null; int MAX_SIZE = 100000; MRCompiler comp = new MRCompiler(pp, pc); comp.compile(); MROperPlan mrp = comp.getMRPlan(); PlanPrinter ppp = new PlanPrinter(mrp); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ppp.print(baos); compiledPlan = baos.toString(); if (generate) { FileOutputStream fos = new FileOutputStream(expectedFile); fos.write(baos.toByteArray()); return; } FileInputStream fis = new FileInputStream(expectedFile); byte[] b = new byte[MAX_SIZE]; int len = fis.read(b); goldenPlan = new String(b, 0, len); if (goldenPlan.charAt(len - 1) == '\n') goldenPlan = goldenPlan.substring(0, len - 1); pp.explain(System.out); System.out.println(); System.out.println("<<<" + compiledPlan + ">>>"); System.out.println("-------------"); System.out.println("Golden"); System.out.println("<<<" + goldenPlan + ">>>"); System.out.println("-------------"); assertEquals(goldenPlan, compiledPlan); }
@Test public void testMRCompilerErr1() throws Exception { PhysicalPlan pp = new PhysicalPlan(); PhysicalPlan ldFil1 = GenPhyOp.loadedFilter(); pp.merge(ldFil1); POSplit op = GenPhyOp.topSplitOp(); pp.addAsLeaf(op); try { Util.buildMRPlan(pp, pc); fail("Expected failure."); } catch (MRCompilerException mrce) { assertTrue(mrce.getErrorCode() == 2025); } }
/** * @param op * @param index * @param plan * @throws PlanException */ private static void setProjectInput(PhysicalOperator op, PhysicalPlan plan, int index) throws PlanException { String scope = op.getOperatorKey().scope; POProject proj = new POProject( new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)), op.getRequestedParallelism(), index); proj.setResultType(DataType.BAG); // Remove old connections and elements from the plan plan.trimAbove(op); plan.add(proj); plan.connect(proj, op); List<PhysicalOperator> inputs = Lists.newArrayList(); inputs.add(proj); op.setInputs(inputs); }
/** * create new Local rearrange by cloning existing rearrange and add plan for projecting the key * * @param rearrange * @return * @throws PlanException * @throws CloneNotSupportedException */ private static POLocalRearrange getNewRearrange(POLocalRearrange rearrange) throws PlanException, CloneNotSupportedException { POLocalRearrange newRearrange = rearrange.clone(); // Set the projection to be the key PhysicalPlan newPlan = new PhysicalPlan(); String scope = newRearrange.getOperatorKey().scope; POProject proj = new POProject( new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)), -1, 0); proj.setResultType(newRearrange.getKeyType()); newPlan.add(proj); List<PhysicalPlan> plans = new ArrayList<PhysicalPlan>(1); plans.add(newPlan); newRearrange.setPlansFromCombiner(plans); return newRearrange; }
public void testDistinct1() throws Exception { PhysicalPlan php = new PhysicalPlan(); PhysicalPlan ldFil1 = GenPhyOp.loadedFilter(); php.merge(ldFil1); PODistinct op = new PODistinct(new OperatorKey("", r.nextLong()), -1, null); php.addAsLeaf(op); PhysicalPlan grpChain1 = GenPhyOp.grpChain(); php.merge(grpChain1); php.connect(op, grpChain1.getRoots().get(0)); PODistinct op1 = new PODistinct(new OperatorKey("", r.nextLong()), -1, null); php.addAsLeaf(op1); POStore st = GenPhyOp.topStoreOp(); php.addAsLeaf(st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC16.gld"); }
@Test public void testSim3() throws Exception { PhysicalPlan php = new PhysicalPlan(); PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain(); PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain(); php.merge(ldGrpChain1); php.merge(ldGrpChain2); POUnion un = GenPhyOp.topUnionOp(); php.addAsLeaf(un); PhysicalPlan ldFil1 = GenPhyOp.loadedFilter(); php.merge(ldFil1); php.connect(ldFil1.getLeaves().get(0), un); POStore st = GenPhyOp.topStoreOp(); php.add(st); php.connect(un, st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC3.gld"); }
/** * Will be called once all the intermediate keys and values are processed. So right place to * stop the reporter thread. */ @Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); if (errorInReduce) { // there was an error in reduce - just return return; } if (PigMapReduce.sJobConfInternal.get().get("pig.stream.in.reduce", "false").equals("true")) { // If there is a stream in the pipeline we could // potentially have more to process - so lets // set the flag stating that all map input has been sent // already and then lets run the pipeline one more time // This will result in nothing happening in the case // where there is no stream in the pipeline rp.endOfAllInput = true; runPipeline(leaf); } if (!inIllustrator) { for (POStore store : stores) { if (!initialized) { MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context); store.setStoreImpl(impl); store.setUp(); } store.tearDown(); } } // Calling EvalFunc.finish() UDFFinishVisitor finisher = new UDFFinishVisitor(rp, new DependencyOrderWalker<PhysicalOperator, PhysicalPlan>(rp)); try { finisher.visit(); } catch (VisitorException e) { throw new IOException("Error trying to finish UDFs", e); } PhysicalOperator.setReporter(null); initialized = false; }
/** * stolen from JobControlCompiler TODO: refactor it to share this * * @param physicalPlan * @param poLoad * @param jobConf * @return * @throws java.io.IOException */ private static JobConf configureLoader(PhysicalPlan physicalPlan, POLoad poLoad, JobConf jobConf) throws IOException { // 这部分似乎没用 Job job = new Job(jobConf); LoadFunc loadFunc = poLoad.getLoadFunc(); loadFunc.setLocation(poLoad.getLFile().getFileName(), job); // stolen from JobControlCompiler ArrayList<FileSpec> pigInputs = new ArrayList<FileSpec>(); // Store the inp filespecs pigInputs.add(poLoad.getLFile()); ArrayList<List<OperatorKey>> inpTargets = Lists.newArrayList(); ArrayList<String> inpSignatures = Lists.newArrayList(); ArrayList<Long> inpLimits = Lists.newArrayList(); // Store the target operators for tuples read // from this input List<PhysicalOperator> loadSuccessors = physicalPlan.getSuccessors(poLoad); List<OperatorKey> loadSuccessorsKeys = Lists.newArrayList(); if (loadSuccessors != null) { for (PhysicalOperator loadSuccessor : loadSuccessors) { loadSuccessorsKeys.add(loadSuccessor.getOperatorKey()); } } inpTargets.add(loadSuccessorsKeys); inpSignatures.add(poLoad.getSignature()); inpLimits.add(poLoad.getLimit()); jobConf.set("pig.inputs", ObjectSerializer.serialize(pigInputs)); jobConf.set("pig.inpTargets", ObjectSerializer.serialize(inpTargets)); jobConf.set("pig.inpSignatures", ObjectSerializer.serialize(inpSignatures)); jobConf.set("pig.inpLimits", ObjectSerializer.serialize(inpLimits)); return jobConf; }
/** * Look for a algebraic POUserFunc as successor to this project, called recursively to skip any * other projects seen on the way. * * @param proj project * @param pplan physical plan * @return null if any operator other POProject or algebraic POUserFunc is found while going down * the plan, otherwise algebraic POUserFunc is returned */ private static POUserFunc getAlgebraicSuccessor(POProject proj, PhysicalPlan pplan) { // check if root is followed by combinable operator List<PhysicalOperator> succs = pplan.getSuccessors(proj); if (succs == null || succs.size() == 0) { return null; } if (succs.size() > 1) { // project shared by more than one operator - does not happen in // plans generated today won't try to combine this return null; } PhysicalOperator succ = succs.get(0); if (succ instanceof POProject) { return getAlgebraicSuccessor((POProject) succ, pplan); } if (succ instanceof POUserFunc && ((POUserFunc) succ).combinable()) { return (POUserFunc) succ; } // some other operator ? can't combine return null; }
/** * Test that POSortedDistinct gets printed as POSortedDistinct * * @throws Exception */ @Test public void testSortedDistinctInForeach() throws Exception { PhysicalPlan php = new PhysicalPlan(); PhysicalPlan grpChain1 = GenPhyOp.loadedGrpChain(); php.merge(grpChain1); List<PhysicalPlan> inputs = new LinkedList<PhysicalPlan>(); PhysicalPlan inplan = new PhysicalPlan(); PODistinct op1 = new POSortedDistinct(new OperatorKey("", r.nextLong()), -1, null); inplan.addAsLeaf(op1); inputs.add(inplan); List<Boolean> toFlattens = new ArrayList<Boolean>(); toFlattens.add(false); POForEach pofe = new POForEach(new OperatorKey("", r.nextLong()), 1, inputs, toFlattens); php.addAsLeaf(pofe); POStore st = GenPhyOp.topStoreOp(); php.addAsLeaf(st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC19.gld"); }
@Test public void testSortUDF1() throws Exception { PhysicalPlan php = new PhysicalPlan(); PhysicalPlan ldFil1 = GenPhyOp.loadedFilter(); php.merge(ldFil1); // set up order by * String funcName = WeirdComparator.class.getName(); POUserComparisonFunc comparator = new POUserComparisonFunc( new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcName)); POSort sort = new POSort( new OperatorKey("", r.nextLong()), -1, ldFil1.getLeaves(), null, new ArrayList<Boolean>(), comparator); sort.setRequestedParallelism(20); PhysicalPlan nesSortPlan = new PhysicalPlan(); POProject topPrj = new POProject(new OperatorKey("", r.nextLong())); topPrj.setColumn(1); topPrj.setOverloaded(true); topPrj.setResultType(DataType.TUPLE); nesSortPlan.add(topPrj); POProject prjStar2 = new POProject(new OperatorKey("", r.nextLong())); prjStar2.setResultType(DataType.TUPLE); prjStar2.setStar(true); nesSortPlan.add(prjStar2); nesSortPlan.connect(topPrj, prjStar2); List<PhysicalPlan> nesSortPlanLst = new ArrayList<PhysicalPlan>(); nesSortPlanLst.add(nesSortPlan); sort.setSortPlans(nesSortPlanLst); php.add(sort); php.connect(ldFil1.getLeaves().get(0), sort); // have a foreach which takes the sort output // and send it two two udfs List<String> udfs = new ArrayList<String>(); udfs.add(COUNT.class.getName()); udfs.add(SUM.class.getName()); POForEach fe3 = GenPhyOp.topForEachOPWithUDF(udfs); php.add(fe3); php.connect(sort, fe3); // add a group above the foreach PhysicalPlan grpChain1 = GenPhyOp.grpChain(); php.merge(grpChain1); php.connect(fe3, grpChain1.getRoots().get(0)); udfs.clear(); udfs.add(AVG.class.getName()); POForEach fe4 = GenPhyOp.topForEachOPWithUDF(udfs); php.addAsLeaf(fe4); PhysicalPlan grpChain2 = GenPhyOp.grpChain(); php.merge(grpChain2); php.connect(fe4, grpChain2.getRoots().get(0)); udfs.clear(); udfs.add(GFCross.class.getName() + "('1')"); POForEach fe5 = GenPhyOp.topForEachOPWithUDF(udfs); php.addAsLeaf(fe5); POStore st = GenPhyOp.topStoreOp(); php.addAsLeaf(st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC15.gld"); }
@Test public void testSim9() throws Exception { PhysicalPlan php = new PhysicalPlan(); POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp(); php.addAsLeaf(gr); PhysicalPlan ldFil1 = GenPhyOp.loadedFilter(); PhysicalPlan ldFil2 = GenPhyOp.loadedFilter(); php.merge(ldFil1); php.connect(ldFil1.getLeaves().get(0), gr); php.merge(ldFil2); php.connect(ldFil2.getLeaves().get(0), gr); POPackage pk = GenPhyOp.topPackageOp(); php.addAsLeaf(pk); POStore st = GenPhyOp.topStoreOp(); php.addAsLeaf(st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC9.gld"); }
// Tests Single input case for both blocking and non-blocking // with both map and reduce phases @Test public void testSim1() throws Exception { PhysicalPlan php = new PhysicalPlan(); POLoad ld = GenPhyOp.topLoadOp(); php.add(ld); PhysicalPlan grpChain1 = GenPhyOp.grpChain(); php.merge(grpChain1); php.connect(ld, grpChain1.getRoots().get(0)); PhysicalOperator leaf = php.getLeaves().get(0); PhysicalPlan grpChain2 = GenPhyOp.grpChain(); php.merge(grpChain2); php.connect(leaf, grpChain2.getRoots().get(0)); leaf = php.getLeaves().get(0); POFilter fl = GenPhyOp.topFilterOp(); php.add(fl); php.connect(leaf, fl); POStore st = GenPhyOp.topStoreOp(); php.add(st); php.connect(fl, st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC1.gld"); }
@Test public void testSpl3() throws Exception { PhysicalPlan php = new PhysicalPlan(); POLoad lA = GenPhyOp.topLoadOp(); POSplit spl = GenPhyOp.topSplitOp(); php.add(lA); php.add(spl); php.connect(lA, spl); POFilter fl1 = GenPhyOp.topFilterOp(); fl1.setRequestedParallelism(10); POFilter fl2 = GenPhyOp.topFilterOp(); fl2.setRequestedParallelism(20); php.add(fl1); php.add(fl2); php.connect(spl, fl1); php.connect(spl, fl2); POSplit sp11 = GenPhyOp.topSplitOp(); POSplit sp21 = GenPhyOp.topSplitOp(); php.add(sp11); php.add(sp21); php.connect(fl1, sp11); php.connect(fl2, sp21); POFilter fl11 = GenPhyOp.topFilterOp(); fl11.setRequestedParallelism(10); POFilter fl21 = GenPhyOp.topFilterOp(); fl21.setRequestedParallelism(20); POFilter fl22 = GenPhyOp.topFilterOp(); fl22.setRequestedParallelism(30); php.add(fl11); php.add(fl21); php.add(fl22); php.connect(sp11, fl11); php.connect(sp21, fl21); php.connect(sp21, fl22); POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp(); lr1.setRequestedParallelism(40); POLocalRearrange lr21 = GenPhyOp.topLocalRearrangeOp(); lr21.setRequestedParallelism(15); POLocalRearrange lr22 = GenPhyOp.topLocalRearrangeOp(); lr22.setRequestedParallelism(35); php.add(lr1); php.add(lr21); php.add(lr22); php.connect(fl11, lr1); php.connect(fl21, lr21); php.connect(fl22, lr22); POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp(); php.addAsLeaf(gr); POPackage pk = GenPhyOp.topPackageOp(); pk.setRequestedParallelism(25); php.addAsLeaf(pk); POSplit sp2 = GenPhyOp.topSplitOp(); php.addAsLeaf(sp2); POFilter fl3 = GenPhyOp.topFilterOp(); fl3.setRequestedParallelism(100); POFilter fl4 = GenPhyOp.topFilterOp(); fl4.setRequestedParallelism(80); php.add(fl3); php.add(fl4); php.connect(sp2, fl3); php.connect(sp2, fl4); POUnion un = GenPhyOp.topUnionOp(); php.addAsLeaf(un); POStore st = GenPhyOp.topStoreOp(); php.addAsLeaf(st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC14.gld"); }
@Test public void testSpl2() throws Exception { PhysicalPlan php = new PhysicalPlan(); POLoad lA = GenPhyOp.topLoadOp(); POSplit spl = GenPhyOp.topSplitOp(); php.add(lA); php.add(spl); php.connect(lA, spl); POFilter fl1 = GenPhyOp.topFilterOp(); POFilter fl2 = GenPhyOp.topFilterOp(); php.add(fl1); php.add(fl2); php.connect(spl, fl1); php.connect(spl, fl2); POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp(); POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp(); php.add(lr1); php.add(lr2); php.connect(fl1, lr1); php.connect(fl2, lr2); POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp(); php.addAsLeaf(gr); POPackage pk = GenPhyOp.topPackageOp(); php.addAsLeaf(pk); POSplit sp2 = GenPhyOp.topSplitOp(); php.addAsLeaf(sp2); POFilter fl3 = GenPhyOp.topFilterOp(); POFilter fl4 = GenPhyOp.topFilterOp(); php.add(fl3); php.add(fl4); php.connect(sp2, fl3); php.connect(sp2, fl4); POUnion un = GenPhyOp.topUnionOp(); php.addAsLeaf(un); POStore st = GenPhyOp.topStoreOp(); php.addAsLeaf(st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC13.gld"); }
@Test public void testRun2() throws Exception { PhysicalPlan php = new PhysicalPlan(); PhysicalPlan part1 = new PhysicalPlan(); POLoad lC = GenPhyOp.topLoadOp(); POFilter fC = GenPhyOp.topFilterOp(); POLocalRearrange lrC = GenPhyOp.topLocalRearrangeOp(); POGlobalRearrange grC = GenPhyOp.topGlobalRearrangeOp(); POPackage pkC = GenPhyOp.topPackageOp(); part1.add(lC); part1.add(fC); part1.connect(lC, fC); part1.add(lrC); part1.connect(fC, lrC); part1.add(grC); part1.connect(lrC, grC); part1.add(pkC); part1.connect(grC, pkC); POPackage pkD = GenPhyOp.topPackageOp(); POLocalRearrange lrD = GenPhyOp.topLocalRearrangeOp(); POGlobalRearrange grD = GenPhyOp.topGlobalRearrangeOp(); POLoad lD = GenPhyOp.topLoadOp(); part1.add(lD); part1.add(lrD); part1.connect(lD, lrD); part1.add(grD); part1.connect(lrD, grD); part1.add(pkD); part1.connect(grD, pkD); part1.connect(pkD, grC); POLoad lA = GenPhyOp.topLoadOp(); POLoad lB = GenPhyOp.topLoadOp(); // POLoad lC = lA; POFilter fA = GenPhyOp.topFilterOp(); POLocalRearrange lrA = GenPhyOp.topLocalRearrangeOp(); POLocalRearrange lrB = GenPhyOp.topLocalRearrangeOp(); POGlobalRearrange grAB = GenPhyOp.topGlobalRearrangeOp(); POPackage pkAB = GenPhyOp.topPackageOp(); POFilter fAB = GenPhyOp.topFilterOp(); POUnion unABC = GenPhyOp.topUnionOp(); php.add(lA); php.add(lB); php.add(fA); php.connect(lA, fA); php.add(lrA); php.add(lrB); php.connect(fA, lrA); php.connect(lB, lrB); php.add(grAB); php.connect(lrA, grAB); php.connect(lrB, grAB); php.add(pkAB); php.connect(grAB, pkAB); php.add(fAB); php.connect(pkAB, fAB); php.merge(part1); List<PhysicalOperator> leaves = new ArrayList<PhysicalOperator>(); for (PhysicalOperator phyOp : php.getLeaves()) { leaves.add(phyOp); } php.add(unABC); for (PhysicalOperator physicalOperator : leaves) { php.connect(physicalOperator, unABC); } POStore st = GenPhyOp.topStoreOp(); php.add(st); php.connect(unABC, st); run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC11.gld"); }