@Test public void testNewMergeMismatchType2() throws Throwable { LogicalSchema a = org.apache.pig.newplan.logical.Util.translateSchema( Utils.getSchemaFromString("a1:chararray, b1:(b11:double, b12:(b121:int)), c1:long")); LogicalSchema b = org.apache.pig.newplan.logical.Util.translateSchema( Utils.getSchemaFromString("a2:bytearray, b2:(b21:double, b22:long), c2:int")); LogicalSchema mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.Union); LogicalSchema expected = org.apache.pig.newplan.logical.Util.translateSchema( Utils.getSchemaFromString("a1:chararray, b1:(), c1:long")); expected.getField(1).schema = new LogicalSchema(); Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false)); try { LogicalSchema.merge(a, b, LogicalSchema.MergeMode.LoadForEach); Assert.fail(); } catch (FrontendException e) { Assert.assertTrue(e.getErrorCode() == 1031); } try { LogicalSchema.merge(b, a, LogicalSchema.MergeMode.LoadForEach); Assert.fail(); } catch (FrontendException e) { Assert.assertTrue(e.getErrorCode() == 1031); } }
@Test public void testNewNormalNestedMerge2() throws Exception { LogicalSchema a = org.apache.pig.newplan.logical.Util.translateSchema( Utils.getSchemaFromString( "a1:(a11:chararray, a12:float), b1:(b11:chararray, b12:float), c1:long")); LogicalSchema b = org.apache.pig.newplan.logical.Util.translateSchema( Utils.getSchemaFromString("a2:bytearray, b2:(b21:double, b22:long), c2:chararray")); LogicalSchema mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.Union); LogicalSchema expected = org.apache.pig.newplan.logical.Util.translateSchema( Utils.getSchemaFromString("a1:(a11:chararray, a12:float), b1:(), c1:bytearray")); expected.getField(1).schema = new LogicalSchema(); Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false)); mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.LoadForEach); expected = org.apache.pig.newplan.logical.Util.translateSchema( Utils.getSchemaFromString( "a1:(a11:chararray, a12:float), b1:(b11:chararray, b12:float), c1:long")); Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false)); mergedSchema = LogicalSchema.merge(b, a, LogicalSchema.MergeMode.LoadForEach); expected = org.apache.pig.newplan.logical.Util.translateSchema( Utils.getSchemaFromString( "a2:(a11:chararray, a12:float), b2:(b21:double, b22:long), c2:chararray")); Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false)); }
@Test // See PIG-730 public void testMergeSchemaWithTwoLevelAccess() throws Exception { // Generate two schemas Schema s1 = Utils.getSchemaFromString("a:{t:(a0:int, a1:int)}"); Schema s2 = Utils.getSchemaFromString("b:{t:(b0:int, b1:int)}"); s1.getField(0).schema.setTwoLevelAccessRequired(true); s1.getField(0).schema.setTwoLevelAccessRequired(false); Schema s3 = Schema.mergeSchema(s1, s2, true); Assert.assertEquals(s3, s2); }
@Test // See PIG-730 public void testMergeSchemaWithTwoLevelAccess3() throws Exception { // Generate two schemas LogicalSchema ls1 = Utils.parseSchema("a:{t:(a0:int, a1:int)}"); LogicalSchema ls2 = Utils.parseSchema("b:{t:(b0:int, b1:int)}"); LogicalSchema ls3 = LogicalSchema.merge(ls1, ls2, MergeMode.LoadForEach); Assert.assertTrue( org.apache.pig.newplan.logical.Util.translateSchema(ls3) .toString() .equals("{a: {(a0: int,a1: int)}}")); }
@Test public void testNewMergeDifferentSize1() throws Throwable { LogicalSchema a = Utils.parseSchema("a1:bytearray, b1:long, c1:long"); LogicalSchema b = Utils.parseSchema("a2:bytearray, b2:long"); LogicalSchema mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.Union); Assert.assertTrue(mergedSchema == null); try { LogicalSchema.merge(a, b, LogicalSchema.MergeMode.LoadForEach); Assert.fail(); } catch (FrontendException e) { Assert.assertTrue(e.getErrorCode() == 1031); } }
@Test public void testNewMergeNullSchemas() throws Throwable { LogicalSchema a = Utils.parseSchema("a1:bytearray, b1:(b11:int, b12:float), c1:long"); LogicalSchema b = Utils.parseSchema("a2:bytearray, b2:(), c2:int"); LogicalSchema mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.Union); LogicalSchema expected = Utils.parseSchema("a1:bytearray, b1:(), c1:long"); Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false)); mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.LoadForEach); expected = Utils.parseSchema("a1:bytearray, b1:(b11:int, b12:float), c1:long"); Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false)); mergedSchema = LogicalSchema.merge(b, a, LogicalSchema.MergeMode.LoadForEach); expected = Utils.parseSchema("a2:bytearray, b2:(b11:int,b12:float), c2:int"); Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false)); }
/** Configures the Reduce plan, the POPackage operator and the reporter thread */ @SuppressWarnings("unchecked") @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); inIllustrator = inIllustrator(context); if (inIllustrator) pack = getPack(context); Configuration jConf = context.getConfiguration(); SpillableMemoryManager.configure(ConfigurationUtil.toProperties(jConf)); context .getConfiguration() .set( PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId())); sJobContext = context; sJobConfInternal.set(context.getConfiguration()); sJobConf = context.getConfiguration(); try { PigContext.setPackageImportList( (ArrayList<String>) ObjectSerializer.deserialize(jConf.get("udf.import.list"))); pigContext = (PigContext) ObjectSerializer.deserialize(jConf.get("pig.pigContext")); // This attempts to fetch all of the generated code from the distributed cache, and resolve // it SchemaTupleBackend.initialize(jConf, pigContext); if (rp == null) rp = (PhysicalPlan) ObjectSerializer.deserialize(jConf.get("pig.reducePlan")); stores = PlanHelper.getPhysicalOperators(rp, POStore.class); if (!inIllustrator) pack = (POPackage) ObjectSerializer.deserialize(jConf.get("pig.reduce.package")); // To be removed if (rp.isEmpty()) log.debug("Reduce Plan empty!"); else { ByteArrayOutputStream baos = new ByteArrayOutputStream(); rp.explain(baos); log.debug(baos.toString()); } pigReporter = new ProgressableReporter(); if (!(rp.isEmpty())) { roots = rp.getRoots().toArray(new PhysicalOperator[1]); leaf = rp.getLeaves().get(0); } // Get the UDF specific context MapRedUtil.setupUDFContext(jConf); } catch (IOException ioe) { String msg = "Problem while configuring reduce plan."; throw new RuntimeException(msg, ioe); } log.info( "Aliases being processed per job phase (AliasName[line,offset]): " + jConf.get("pig.alias.location")); Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get()); }
protected void doAllPredecessors(O node, Set<O> seen, Collection<O> fifo) throws VisitorException { if (!seen.contains(node)) { // We haven't seen this one before. Collection<O> preds = Utils.mergeCollection(mPlan.getPredecessors(node), mPlan.getSoftLinkPredecessors(node)); if (preds != null && preds.size() > 0) { // Do all our predecessors before ourself for (O op : preds) { doAllPredecessors(op, seen, fifo); } } // Now do ourself seen.add(node); fifo.add(node); } }
// PIG-2146 @Test public void testSchemaInStoreForDistinctLimit() throws Exception { // test if the POStore in the 2nd mr plan (that stores the actual output) // has a schema String query = "a = load 'input1' as (a : int,b :float ,c : int);" + "b = distinct a;" + "c = limit b 10;" + "store c into 'output';"; PhysicalPlan pp = Util.buildPp(pigServer, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); MapReduceOper secondMrOper = mrPlan.getLeaves().get(0); POStore store = (POStore) secondMrOper.reducePlan.getLeaves().get(0); assertEquals( "compare load and store schema", store.getSchema(), Utils.getSchemaFromString("a : int,b :float ,c : int")); }
public void prepareToWrite(RecordWriter writer) { // Get the schema string from the UDFContext object. UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[] {udfContextSignature}); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema != null) { // Parse the schema from the string stored in the properties object. try { schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); } catch (ParserException pex) { logger.warn("Could not parse schema for storing."); } } if (headerTreatment == Headers.DEFAULT) { headerTreatment = Headers.SKIP_OUTPUT_HEADER; } // PigStorage's prepareToWrite() super.prepareToWrite(writer); }
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { return Utils.getSchema(this, location, true, job); }