public void printData(HashMap<Object, List<Tuple>> data) throws ExecException { for (Object o : data.keySet()) { System.out.println(o); for (Tuple t : data.get(o)) { System.out.println("\t" + t.toDelimitedString(", ")); } } }
@Override public DataBag exec(Tuple input) throws IOException { retrieveContextValues(); ArrayList<String> joinKeyNames = new ArrayList<String>(); for (int i = 1; i < input.size(); i += 2) { joinKeyNames.add((String) input.get(i)); } JoinCollector collector = new JoinCollector(); // the first bag is the outer bag String leftBagName = bagNames.get(0); DataBag leftBag = getBag(input, leftBagName); String leftBagJoinKeyName = getPrefixedAliasName(bagNameToJoinKeyPrefix.get(leftBagName), joinKeyNames.get(0)); collector.setJoinData(collector.groupTuples(leftBag, leftBagJoinKeyName)); // now, for each additional bag, group up the tuples by the join key, then join them in if (bagNames.size() > 1) { for (int i = 1; i < bagNames.size(); i++) { String bagName = bagNames.get(i); DataBag bag = getBag(input, bagName); String joinKeyName = getPrefixedAliasName(bagNameToJoinKeyPrefix.get(bagName), joinKeyNames.get(i)); int tupleSize = bagNameToSize.get(bagName); if (bag == null) throw new IOException( "Error in instance: " + getInstanceName() + " with properties: " + getInstanceProperties() + " and tuple: " + input.toDelimitedString(", ") + " -- Expected bag, got null"); HashMap<Object, List<Tuple>> groupedData = collector.groupTuples(bag, joinKeyName); // outer join, so go back in and add nulls; groupedData = collector.insertNullTuples(groupedData, tupleSize); for (Map.Entry<Object, List<Tuple>> entry : groupedData.entrySet()) { collector.joinTuples(entry.getKey(), entry.getValue()); } } } // assemble output bag DataBag outputBag = BagFactory.getInstance().newDefaultBag(); for (List<Tuple> tuples : collector.getJoinData().values()) { for (Tuple tuple : tuples) { outputBag.add(tuple); } } return outputBag; }
@Test public void testProtoToPig() throws IOException { AddressBook abProto = Fixtures.buildAddressBookProto(); Tuple abProtoTuple = tf_.newTuple(new DataByteArray(abProto.toByteArray())); ProtobufBytesToTuple abProtoToPig = new ProtobufBytesToTuple(AddressBook.class.getCanonicalName()); Tuple abTuple = abProtoToPig.exec(abProtoTuple); assertEquals( "{(Elephant Bird,123,[email protected],{(415-999-9999,HOME),(415-666-6666,MOBILE),(415-333-3333,WORK)}),(Elephant Bird,123,[email protected],{(415-999-9999,HOME),(415-666-6666,MOBILE),(415-333-3333,WORK)})},", abTuple.toDelimitedString(",")); }