@Override public Result getNextTuple() throws ExecException { res = super.getNextTuple(); if (writer == null) { // In the case of combiner return res; } try { switch (res.returnStatus) { case POStatus.STATUS_OK: if (illustrator == null) { Tuple result = (Tuple) res.result; Byte index = (Byte) result.get(0); PigNullableWritable key = HDataType.getWritableComparableTypes(result.get(1), keyType); NullableTuple val = new NullableTuple((Tuple) result.get(2)); // Both the key and the value need the index. The key needs it so // that it can be sorted on the index in addition to the key // value. The value needs it so that POPackage can properly // assign the tuple to its slot in the projection. key.setIndex(index); val.setIndex(index); if (isSkewedJoin) { // Wrap into a NullablePartitionWritable to match the key // of the right table from POPartitionRearrangeTez for the skewed join NullablePartitionWritable wrappedKey = new NullablePartitionWritable(key); wrappedKey.setPartition(-1); key = wrappedKey; } writer.write(key, val); } else { illustratorMarkup(res.result, res.result, 0); } res = RESULT_EMPTY; break; case POStatus.STATUS_EOP: case POStatus.STATUS_ERR: case POStatus.STATUS_NULL: default: break; } } catch (IOException ioe) { int errCode = 2135; String msg = "Received error from POLocalRearrage function." + ioe.getMessage(); throw new ExecException(msg, errCode, ioe); } return inp; }
@Override public void collect(Context oc, Tuple tuple) throws InterruptedException, IOException { Byte index = (Byte) tuple.get(0); PigNullableWritable key = HDataType.getWritableComparableTypes(tuple.get(1), keyType); NullableTuple val = new NullableTuple((Tuple) tuple.get(2)); // Both the key and the value need the index. The key needs it so // that it can be sorted on the index in addition to the key // value. The value needs it so that POPackage can properly // assign the tuple to its slot in the projection. key.setIndex(index); val.setIndex(index); oc.write(key, val); }
@Override public void collect(Context oc, Tuple tuple) throws InterruptedException, IOException { Byte tupleKeyIdx = 2; Byte tupleValIdx = 3; Byte index = (Byte) tuple.get(0); Integer partitionIndex = -1; // for partitioning table, the partition index isn't present if (tuple.size() == 3) { // super.collect(oc, tuple); // return; tupleKeyIdx--; tupleValIdx--; } else { partitionIndex = (Integer) tuple.get(1); } PigNullableWritable key = HDataType.getWritableComparableTypes(tuple.get(tupleKeyIdx), keyType); NullablePartitionWritable wrappedKey = new NullablePartitionWritable(key); NullableTuple val = new NullableTuple((Tuple) tuple.get(tupleValIdx)); // Both the key and the value need the index. The key needs it so // that it can be sorted on the index in addition to the key // value. The value needs it so that POPackage can properly // assign the tuple to its slot in the projection. wrappedKey.setIndex(index); // set the partition wrappedKey.setPartition(partitionIndex); val.setIndex(index); oc.write(wrappedKey, val); }
private void runTest(Object key, boolean inner[], byte keyType) throws ExecException, IOException { Random r = new Random(); DataBag db1 = GenRandomData.genRandSmallTupDataBag(r, 10, 100); DataBag db2 = GenRandomData.genRandSmallTupDataBag(r, 10, 100); List<NullableTuple> db = new ArrayList<NullableTuple>(200); Iterator<Tuple> db1Iter = db1.iterator(); if (!inner[0]) { while (db1Iter.hasNext()) { NullableTuple it = new NullableTuple(db1Iter.next()); it.setIndex((byte) 0); db.add(it); } } Iterator<Tuple> db2Iter = db2.iterator(); while (db2Iter.hasNext()) { NullableTuple it = new NullableTuple(db2Iter.next()); it.setIndex((byte) 1); db.add(it); } // ITIterator iti = new TestPackage.ITIterator(db.iterator()); POPackage pop = new POPackage(new OperatorKey("", r.nextLong())); pop.setNumInps(2); pop.getPkgr().setInner(inner); PigNullableWritable k = HDataType.getWritableComparableTypes(key, keyType); pop.attachInput(k, db.iterator()); if (keyType != DataType.BAG) { // test serialization NullablePartitionWritable wr; if (keyType == DataType.TUPLE) { BinSedesTuple tup = (BinSedesTuple) binfactory.newTupleNoCopy(((Tuple) k.getValueAsPigType()).getAll()); wr = new NullablePartitionWritable(new NullableTuple(tup)); } else { wr = new NullablePartitionWritable(k); } ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream out = new DataOutputStream(baos); wr.write(out); byte[] arr = baos.toByteArray(); ByteArrayInputStream bais = new ByteArrayInputStream(arr); DataInputStream in = new DataInputStream(bais); NullablePartitionWritable re = new NullablePartitionWritable(); re.readFields(in); assertEquals(re, wr); } // we are not doing any optimization to remove // parts of the "value" which are present in the "key" in this // unit test - so set up the "keyInfo" accordingly in // the POPackage Map<Integer, Pair<Boolean, Map<Integer, Integer>>> keyInfo = new HashMap<Integer, Pair<Boolean, Map<Integer, Integer>>>(); Pair<Boolean, Map<Integer, Integer>> p = new Pair<Boolean, Map<Integer, Integer>>(false, new HashMap<Integer, Integer>()); keyInfo.put(0, p); keyInfo.put(1, p); pop.getPkgr().setKeyInfo(keyInfo); Tuple t = null; Result res = null; res = pop.getNextTuple(); if (res.returnStatus == POStatus.STATUS_NULL && inner[0]) return; assertEquals(POStatus.STATUS_OK, res.returnStatus); t = (Tuple) res.result; Object outKey = t.get(0); DataBag outDb1 = (DataBag) t.get(1); DataBag outDb2 = (DataBag) t.get(2); assertEquals(key, outKey); assertTrue(TestHelper.compareBags(db1, outDb1)); assertTrue(TestHelper.compareBags(db2, outDb2)); }
/** * The reduce function which packages the key and List<Tuple> into key, Bag<Tuple> * after converting Hadoop type key into Pig type. The package result is either collected as is, * if the reduce plan is empty or after passing through the reduce plan. */ @Override protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context) throws IOException, InterruptedException { if (!initialized) { initialized = true; // cache the collector for use in runPipeline() // which could additionally be called from close() this.outputCollector = context; pigReporter.setRep(context); PhysicalOperator.setReporter(pigReporter); boolean aggregateWarning = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning")); PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance(); pigStatusReporter.setContext(new MRTaskContext(context)); PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance(); pigHadoopLogger.setReporter(pigStatusReporter); pigHadoopLogger.setAggregate(aggregateWarning); PhysicalOperator.setPigLogger(pigHadoopLogger); for (POStore store : stores) { MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context); store.setStoreImpl(impl); store.setUp(); } } // If the keyType is not a tuple, the MapWithComparator.collect() // would have wrapped the key into a tuple so that the // comparison UDF used in the order by can process it. // We need to unwrap the key out of the tuple and hand it // to the POPackage for processing if (keyType != DataType.TUPLE) { Tuple t = (Tuple) (key.getValueAsPigType()); try { key = HDataType.getWritableComparableTypes(t.get(0), keyType); } catch (ExecException e) { throw e; } } pack.attachInput(key, tupIter.iterator()); Result res = pack.getNextTuple(); if (res.returnStatus == POStatus.STATUS_OK) { Tuple packRes = (Tuple) res.result; if (rp.isEmpty()) { context.write(null, packRes); return; } rp.attachInput(packRes); List<PhysicalOperator> leaves = rp.getLeaves(); PhysicalOperator leaf = leaves.get(0); runPipeline(leaf); } if (res.returnStatus == POStatus.STATUS_NULL) { return; } if (res.returnStatus == POStatus.STATUS_ERR) { int errCode = 2093; String msg = "Encountered error in package operator while processing group."; throw new ExecException(msg, errCode, PigException.BUG); } }