public void testSkewedJoinWithNoProperties() throws IOException { pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);"); pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);"); try { DataBag dbfrj = BagFactory.getInstance().newDefaultBag(); DataBag dbshj = BagFactory.getInstance().newDefaultBag(); { pigServer.registerQuery( "C = join A by (id, name), B by (id, name) using \"skewed\" parallel 5;"); Iterator<Tuple> iter = pigServer.openIterator("C"); while (iter.hasNext()) { dbfrj.add(iter.next()); } } { pigServer.registerQuery("E = join A by(id, name), B by (id, name);"); Iterator<Tuple> iter = pigServer.openIterator("E"); while (iter.hasNext()) { dbshj.add(iter.next()); } } Assert.assertTrue(dbfrj.size() > 0 && dbshj.size() > 0); Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj)); } catch (Exception e) { fail(e.getMessage()); } }
public TestSkewedJoin() throws ExecException, IOException { pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); // pigServer = new PigServer(ExecType.LOCAL); pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.maxtuple", "5"); pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.memusage", "0.01"); }