예제 #1
0
 public void printData(HashMap<Object, List<Tuple>> data) throws ExecException {
   for (Object o : data.keySet()) {
     System.out.println(o);
     for (Tuple t : data.get(o)) {
       System.out.println("\t" + t.toDelimitedString(", "));
     }
   }
 }
예제 #2
0
  @Override
  public DataBag exec(Tuple input) throws IOException {
    retrieveContextValues();

    ArrayList<String> joinKeyNames = new ArrayList<String>();
    for (int i = 1; i < input.size(); i += 2) {
      joinKeyNames.add((String) input.get(i));
    }

    JoinCollector collector = new JoinCollector();
    // the first bag is the outer bag
    String leftBagName = bagNames.get(0);
    DataBag leftBag = getBag(input, leftBagName);
    String leftBagJoinKeyName =
        getPrefixedAliasName(bagNameToJoinKeyPrefix.get(leftBagName), joinKeyNames.get(0));
    collector.setJoinData(collector.groupTuples(leftBag, leftBagJoinKeyName));
    // now, for each additional bag, group up the tuples by the join key, then join them in
    if (bagNames.size() > 1) {
      for (int i = 1; i < bagNames.size(); i++) {
        String bagName = bagNames.get(i);
        DataBag bag = getBag(input, bagName);
        String joinKeyName =
            getPrefixedAliasName(bagNameToJoinKeyPrefix.get(bagName), joinKeyNames.get(i));
        int tupleSize = bagNameToSize.get(bagName);
        if (bag == null)
          throw new IOException(
              "Error in instance: "
                  + getInstanceName()
                  + " with properties: "
                  + getInstanceProperties()
                  + " and tuple: "
                  + input.toDelimitedString(", ")
                  + " -- Expected bag, got null");
        HashMap<Object, List<Tuple>> groupedData = collector.groupTuples(bag, joinKeyName);
        // outer join, so go back in and add nulls;
        groupedData = collector.insertNullTuples(groupedData, tupleSize);
        for (Map.Entry<Object, List<Tuple>> entry : groupedData.entrySet()) {
          collector.joinTuples(entry.getKey(), entry.getValue());
        }
      }
    }

    // assemble output bag
    DataBag outputBag = BagFactory.getInstance().newDefaultBag();
    for (List<Tuple> tuples : collector.getJoinData().values()) {
      for (Tuple tuple : tuples) {
        outputBag.add(tuple);
      }
    }

    return outputBag;
  }
  @Test
  public void testProtoToPig() throws IOException {
    AddressBook abProto = Fixtures.buildAddressBookProto();

    Tuple abProtoTuple = tf_.newTuple(new DataByteArray(abProto.toByteArray()));

    ProtobufBytesToTuple abProtoToPig =
        new ProtobufBytesToTuple(AddressBook.class.getCanonicalName());
    Tuple abTuple = abProtoToPig.exec(abProtoTuple);
    assertEquals(
        "{(Elephant Bird,123,[email protected],{(415-999-9999,HOME),(415-666-6666,MOBILE),(415-333-3333,WORK)}),(Elephant Bird,123,[email protected],{(415-999-9999,HOME),(415-666-6666,MOBILE),(415-333-3333,WORK)})},",
        abTuple.toDelimitedString(","));
  }