@Override public int getPartition(PigNullableWritable wrappedKey, Writable value, int numPartitions) { // for streaming tables, return the partition index blindly if (wrappedKey instanceof NullablePartitionWritable && (((NullablePartitionWritable) wrappedKey).getPartition()) != -1) { return ((NullablePartitionWritable) wrappedKey).getPartition(); } // for partition table, compute the index based on the sampler output Pair<Integer, Integer> indexes; Integer curIndex = -1; Tuple keyTuple = TupleFactory.getInstance().newTuple(1); // extract the key from nullablepartitionwritable PigNullableWritable key = ((NullablePartitionWritable) wrappedKey).getKey(); try { keyTuple.set(0, key.getValueAsPigType()); } catch (ExecException e) { return -1; } // if the key is not null and key if (key instanceof NullableTuple && key.getValueAsPigType() != null) { keyTuple = (Tuple) key.getValueAsPigType(); } // if the partition file is empty, use numPartitions totalReducers = (totalReducers > 0) ? totalReducers : numPartitions; indexes = reducerMap.get(keyTuple); // if the reducerMap does not contain the key, do the default hash based partitioning if (indexes == null) { return (Math.abs(keyTuple.hashCode() % totalReducers)); } if (currentIndexMap.containsKey(keyTuple)) { curIndex = currentIndexMap.get(keyTuple); } if (curIndex >= (indexes.first + indexes.second) || curIndex == -1) { curIndex = indexes.first; } else { curIndex++; } // set it in the map currentIndexMap.put(keyTuple, curIndex); return (curIndex % totalReducers); }
@Override public Result getNextTuple() throws ExecException { res = super.getNextTuple(); if (writer == null) { // In the case of combiner return res; } try { switch (res.returnStatus) { case POStatus.STATUS_OK: if (illustrator == null) { Tuple result = (Tuple) res.result; Byte index = (Byte) result.get(0); PigNullableWritable key = HDataType.getWritableComparableTypes(result.get(1), keyType); NullableTuple val = new NullableTuple((Tuple) result.get(2)); // Both the key and the value need the index. The key needs it so // that it can be sorted on the index in addition to the key // value. The value needs it so that POPackage can properly // assign the tuple to its slot in the projection. key.setIndex(index); val.setIndex(index); if (isSkewedJoin) { // Wrap into a NullablePartitionWritable to match the key // of the right table from POPartitionRearrangeTez for the skewed join NullablePartitionWritable wrappedKey = new NullablePartitionWritable(key); wrappedKey.setPartition(-1); key = wrappedKey; } writer.write(key, val); } else { illustratorMarkup(res.result, res.result, 0); } res = RESULT_EMPTY; break; case POStatus.STATUS_EOP: case POStatus.STATUS_ERR: case POStatus.STATUS_NULL: default: break; } } catch (IOException ioe) { int errCode = 2135; String msg = "Received error from POLocalRearrage function." + ioe.getMessage(); throw new ExecException(msg, errCode, ioe); } return inp; }
@Override public void collect(Context oc, Tuple tuple) throws InterruptedException, IOException { Byte index = (Byte) tuple.get(0); PigNullableWritable key = HDataType.getWritableComparableTypes(tuple.get(1), keyType); NullableTuple val = new NullableTuple((Tuple) tuple.get(2)); // Both the key and the value need the index. The key needs it so // that it can be sorted on the index in addition to the key // value. The value needs it so that POPackage can properly // assign the tuple to its slot in the projection. key.setIndex(index); val.setIndex(index); oc.write(key, val); }
/** * Attaches the required inputs * * @param k - the key being worked on * @param inp - iterator of indexed tuples typically obtained from Hadoop */ public void attachInput(PigNullableWritable k, Iterator<NullableTuple> inp) { tupIter = inp; key = k.getValueAsPigType(); if (useSecondaryKey) { try { key = ((Tuple) key).get(0); } catch (ExecException e) { // TODO Exception throw new RuntimeException(e); } } if (isKeyTuple) { // key is a tuple, cache the key as a // tuple for use in the getNext() keyAsTuple = (Tuple) key; } }
private void runTest(Object key, boolean inner[], byte keyType) throws ExecException, IOException { Random r = new Random(); DataBag db1 = GenRandomData.genRandSmallTupDataBag(r, 10, 100); DataBag db2 = GenRandomData.genRandSmallTupDataBag(r, 10, 100); List<NullableTuple> db = new ArrayList<NullableTuple>(200); Iterator<Tuple> db1Iter = db1.iterator(); if (!inner[0]) { while (db1Iter.hasNext()) { NullableTuple it = new NullableTuple(db1Iter.next()); it.setIndex((byte) 0); db.add(it); } } Iterator<Tuple> db2Iter = db2.iterator(); while (db2Iter.hasNext()) { NullableTuple it = new NullableTuple(db2Iter.next()); it.setIndex((byte) 1); db.add(it); } // ITIterator iti = new TestPackage.ITIterator(db.iterator()); POPackage pop = new POPackage(new OperatorKey("", r.nextLong())); pop.setNumInps(2); pop.getPkgr().setInner(inner); PigNullableWritable k = HDataType.getWritableComparableTypes(key, keyType); pop.attachInput(k, db.iterator()); if (keyType != DataType.BAG) { // test serialization NullablePartitionWritable wr; if (keyType == DataType.TUPLE) { BinSedesTuple tup = (BinSedesTuple) binfactory.newTupleNoCopy(((Tuple) k.getValueAsPigType()).getAll()); wr = new NullablePartitionWritable(new NullableTuple(tup)); } else { wr = new NullablePartitionWritable(k); } ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream out = new DataOutputStream(baos); wr.write(out); byte[] arr = baos.toByteArray(); ByteArrayInputStream bais = new ByteArrayInputStream(arr); DataInputStream in = new DataInputStream(bais); NullablePartitionWritable re = new NullablePartitionWritable(); re.readFields(in); assertEquals(re, wr); } // we are not doing any optimization to remove // parts of the "value" which are present in the "key" in this // unit test - so set up the "keyInfo" accordingly in // the POPackage Map<Integer, Pair<Boolean, Map<Integer, Integer>>> keyInfo = new HashMap<Integer, Pair<Boolean, Map<Integer, Integer>>>(); Pair<Boolean, Map<Integer, Integer>> p = new Pair<Boolean, Map<Integer, Integer>>(false, new HashMap<Integer, Integer>()); keyInfo.put(0, p); keyInfo.put(1, p); pop.getPkgr().setKeyInfo(keyInfo); Tuple t = null; Result res = null; res = pop.getNextTuple(); if (res.returnStatus == POStatus.STATUS_NULL && inner[0]) return; assertEquals(POStatus.STATUS_OK, res.returnStatus); t = (Tuple) res.result; Object outKey = t.get(0); DataBag outDb1 = (DataBag) t.get(1); DataBag outDb2 = (DataBag) t.get(2); assertEquals(key, outKey); assertTrue(TestHelper.compareBags(db1, outDb1)); assertTrue(TestHelper.compareBags(db2, outDb2)); }
/** * The reduce function which packages the key and List<Tuple> into key, Bag<Tuple> * after converting Hadoop type key into Pig type. The package result is either collected as is, * if the reduce plan is empty or after passing through the reduce plan. */ @Override protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context) throws IOException, InterruptedException { if (!initialized) { initialized = true; // cache the collector for use in runPipeline() // which could additionally be called from close() this.outputCollector = context; pigReporter.setRep(context); PhysicalOperator.setReporter(pigReporter); boolean aggregateWarning = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning")); PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance(); pigStatusReporter.setContext(new MRTaskContext(context)); PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance(); pigHadoopLogger.setReporter(pigStatusReporter); pigHadoopLogger.setAggregate(aggregateWarning); PhysicalOperator.setPigLogger(pigHadoopLogger); for (POStore store : stores) { MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context); store.setStoreImpl(impl); store.setUp(); } } // If the keyType is not a tuple, the MapWithComparator.collect() // would have wrapped the key into a tuple so that the // comparison UDF used in the order by can process it. // We need to unwrap the key out of the tuple and hand it // to the POPackage for processing if (keyType != DataType.TUPLE) { Tuple t = (Tuple) (key.getValueAsPigType()); try { key = HDataType.getWritableComparableTypes(t.get(0), keyType); } catch (ExecException e) { throw e; } } pack.attachInput(key, tupIter.iterator()); Result res = pack.getNextTuple(); if (res.returnStatus == POStatus.STATUS_OK) { Tuple packRes = (Tuple) res.result; if (rp.isEmpty()) { context.write(null, packRes); return; } rp.attachInput(packRes); List<PhysicalOperator> leaves = rp.getLeaves(); PhysicalOperator leaf = leaves.get(0); runPipeline(leaf); } if (res.returnStatus == POStatus.STATUS_NULL) { return; } if (res.returnStatus == POStatus.STATUS_ERR) { int errCode = 2093; String msg = "Encountered error in package operator while processing group."; throw new ExecException(msg, errCode, PigException.BUG); } }