Beispiel #1
0
  @Override
  public DataBag extract(Object o, String lang) {

    DocumentMetadata dm = (DocumentMetadata) o;
    DataBag db = new DefaultDataBag();
    DiacriticsRemover DR = new DiacriticsRemover();

    for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) {
      if (lang != null && !lang.equalsIgnoreCase(title.getLanguage())) {
        continue;
      }
      String sTitle = title.getText();
      String normalized_title = (String) DR.normalize(sTitle);
      if (normalized_title == null) {
        continue;
      }
      String[] normals = normalized_title.split("[\\W]+");
      for (String s : normals) {
        if (s.isEmpty()) {
          continue;
        }
        Object normalized = normalizeExtracted(s);
        if (normalized == null) {
          continue;
        }
        Tuple t = TupleFactory.getInstance().newTuple(normalized);
        db.add(t);
      }
    }

    return db;
  }
Beispiel #2
0
  protected static Long sumLongs(Tuple input) throws ExecException {
    // Can't just call sum, because the intermediate results are
    // now Longs insteads of Integers.
    DataBag values = (DataBag) input.get(0);

    // if we were handed an empty bag, return NULL
    // this is in compliance with SQL standard
    if (values.size() == 0) {
      return null;
    }

    long sum = 0;
    boolean sawNonNull = false;
    for (Iterator<Tuple> it = values.iterator(); it.hasNext(); ) {
      Tuple t = it.next();
      try {
        Long l = (Long) (t.get(0));
        if (l == null) continue;
        sawNonNull = true;
        sum += l;
      } catch (RuntimeException exp) {
        int errCode = 2103;
        String msg = "Problem while computing sum of longs.";
        throw new ExecException(msg, errCode, PigException.BUG, exp);
      }
    }

    if (sawNonNull) {
      return Long.valueOf(sum);
    } else {
      return null;
    }
  }
Beispiel #3
0
 @Override
 public Tuple exec(Tuple input) throws IOException {
   // Initial is called in the map - for SUM
   // we just send the tuple down
   try {
     // input is a bag with one tuple containing
     // the column we are trying to sum
     DataBag bg = (DataBag) input.get(0);
     Integer i = null;
     if (bg.iterator().hasNext()) {
       Tuple tp = bg.iterator().next();
       i = (Integer) tp.get(0);
     }
     return tfact.newTuple(i != null ? Long.valueOf(i) : null);
   } catch (NumberFormatException nfe) {
     // treat this particular input as null
     Tuple t = tfact.newTuple(1);
     t.set(0, null);
     return t;
   } catch (ExecException e) {
     throw e;
   } catch (Exception e) {
     int errCode = 2106;
     String msg = "Error while computing sum in " + this.getClass().getSimpleName();
     throw new ExecException(msg, errCode, PigException.BUG, e);
   }
 }
  @Test
  public void testCase1() throws IOException {

    Tuple input = TupleFactory.getInstance().newTuple(2);
    Tuple groupInfo = TupleFactory.getInstance().newTuple(2);

    groupInfo.set(0, "column_3");
    groupInfo.set(1, Integer.valueOf(1));

    DataBag dataBag = new DefaultDataBag();
    // {(PSIColumn: int, columnId: int, value: chararray, tag: boolean , rand: int)}
    for (int i = 0; i < 10; i++) {
      Tuple t = TupleFactory.getInstance().newTuple(4);
      t.set(0, Integer.valueOf(1));
      t.set(1, Integer.valueOf(1));
      t.set(2, array[i]);
      dataBag.add(t);
    }

    input.set(0, groupInfo);
    input.set(1, dataBag);

    Tuple output = inst.exec(input);

    Assert.assertEquals(output.get(0), 1);

    String[] outputArray =
        output.get(1).toString().split(String.valueOf(CalculateStatsUDF.CATEGORY_VAL_SEPARATOR));

    Assert.assertEquals(outputArray[0], "1");
    Assert.assertEquals(outputArray[1], "2");
    Assert.assertEquals(outputArray[2], "0");
  }
 private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema fieldSchema)
     throws IOException {
   if (fieldSchema == null) {
     throw new IOException("Schema is null");
   }
   ResourceFieldSchema[] fss = fieldSchema.getSchema().getFields();
   Tuple t;
   int buf;
   while ((buf = in.read()) != '{') {
     if (buf == -1) {
       throw new IOException("Unexpect end of bag");
     }
   }
   if (fss.length != 1) throw new IOException("Only tuple is allowed inside bag schema");
   ResourceFieldSchema fs = fss[0];
   DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
   while (true) {
     t = consumeTuple(in, fs);
     if (t != null) db.add(t);
     while ((buf = in.read()) != '}' && buf != ',') {
       if (buf == -1) {
         throw new IOException("Unexpect end of bag");
       }
     }
     if (buf == '}') break;
   }
   return db;
 }
Beispiel #6
0
  @Override
  public DataBag exec(Tuple input) throws IOException {
    try {

      DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();

      if (input == null || input.size() == 0) {
        return bag; // an empty bag
      }
      if (this.fieldType == DataType.MAP) {

        Tuple t = DefaultTupleFactory.getInstance().newTuple(1);
        t.set(0, createMap(input));

        bag.add(t);

      } else {
        bag.add(input);
      }

      return bag;

    } catch (Exception e) {
      throw new RuntimeException(
          "Error while computing size in " + this.getClass().getSimpleName());
    }
  }
Beispiel #7
0
  @Override
  public DataBag exec(Tuple input) throws IOException {
    DataBag bag = (DataBag) input.get(0);
    HashMap<String, Double> clsCnt = new HashMap<String, Double>();
    Iterator<Tuple> it = bag.iterator();
    Double sum = new Double(0.0);
    while (it.hasNext()) {
      Tuple item = (Tuple) it.next();
      String cls = (String) item.get(3);
      if (cls != null && cls.length() > 0) {
        Double cur = clsCnt.get(cls);
        Double inc = (Double) item.get(2);
        if (cur != null) {
          clsCnt.put(cls, cur + inc);
        } else {
          clsCnt.put(cls, inc);
        }
        sum += inc;
      }
    }

    Set<Entry<String, Double>> clses = clsCnt.entrySet();
    Iterator<Entry<String, Double>> cit = clses.iterator();
    DataBag result = bagFactory.newDefaultBag();
    while (cit.hasNext()) {
      Entry<String, Double> cls = cit.next();
      Tuple tpl = tupleFactory.newTuple(2);
      tpl.set(0, cls.getKey());
      tpl.set(1, cls.getValue() / sum);
      result.add(tpl);
    }

    return result;
  }
Beispiel #8
0
  protected static Long sum(Tuple input) throws ExecException, IOException {
    DataBag values = (DataBag) input.get(0);

    // if we were handed an empty bag, return NULL
    if (values.size() == 0) {
      return null;
    }

    long sum = 0;
    boolean sawNonNull = false;
    for (Iterator<Tuple> it = values.iterator(); it.hasNext(); ) {
      Tuple t = it.next();
      try {
        Long l = (Long) (t.get(0));
        if (l == null) continue;
        sawNonNull = true;
        sum += l;
      } catch (RuntimeException exp) {
        int errCode = 2103;
        String msg = "Problem while computing sum of longs.";
        throw new ExecException(msg, errCode, PigException.BUG, exp);
      }
    }

    if (sawNonNull) {
      return Long.valueOf(sum);
    } else {
      return null;
    }
  }
Beispiel #9
0
    @Override
    public Tuple exec(Tuple input) throws IOException {
      DataBag output = BagFactory.getInstance().newDefaultBag();

      DataBag samples = (DataBag) input.get(0);
      if (samples == null) {
        // do nothing
      } else if (samples.size() <= numSamples) {
        // no need to construct a reservoir, so just emit intermediate tuples
        for (Tuple sample : samples) {
          // add the score on to the intermediate tuple
          output.add(new ScoredTuple(Math.random(), sample).getIntermediateTuple(tupleFactory));
        }
      } else {
        for (Tuple sample : samples) {
          getReservoir().consider(new ScoredTuple(Math.random(), sample));
        }

        for (ScoredTuple scoredTuple : getReservoir()) {
          // add the score on to the intermediate tuple
          output.add(scoredTuple.getIntermediateTuple(tupleFactory));
        }
      }

      return tupleFactory.newTuple(output);
    }
  @Test
  public void exact() throws Exception {
    EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
    ItemsSketch<String> sketch = new ItemsSketch<String>(8);
    sketch.update("a");
    sketch.update("a");
    sketch.update("b");
    Tuple inputTuple =
        PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));
    DataBag bag = func.exec(inputTuple);
    Assert.assertNotNull(bag);
    Assert.assertEquals(bag.size(), 2);

    Iterator<Tuple> it = bag.iterator();
    Tuple tuple1 = it.next();
    Assert.assertEquals(tuple1.size(), 4);
    Assert.assertEquals((String) tuple1.get(0), "a");
    Assert.assertEquals((long) tuple1.get(1), 2L);
    Assert.assertEquals((long) tuple1.get(2), 2L);
    Assert.assertEquals((long) tuple1.get(3), 2L);

    Tuple tuple2 = it.next();
    Assert.assertEquals(tuple2.size(), 4);
    Assert.assertEquals((String) tuple2.get(0), "b");
    Assert.assertEquals((long) tuple2.get(1), 1L);
    Assert.assertEquals((long) tuple2.get(2), 1L);
    Assert.assertEquals((long) tuple2.get(3), 1L);
  }
Beispiel #11
0
  public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) {
      return null;
    }

    String normStr = ((String) input.get(0));
    if (normStr == null) {
      return null;
    }

    // Remove punctuation except when it's a version number
    normStr = punctPattern.matcher(normStr.trim().toLowerCase()).replaceAll(" ");
    normStr = spacePattern.matcher(normStr).replaceAll(" ");

    DataBag output = bagFactory.newDefaultBag();
    for (String s : spacePattern.split(normStr.trim())) {
      if (s.length() <= 30) {
        Tuple t = tupleFactory.newTuple(1);
        t.set(0, s);
        output.add(t);
      }
    }

    return output;
  }
Beispiel #12
0
 @Override
 public DataBag getValue() {
   DataBag output = BagFactory.getInstance().newDefaultBag();
   for (ScoredTuple sample : getReservoir()) {
     output.add(sample.getTuple());
   }
   return output;
 }
 /**
  * create bag having given number of tuples
  *
  * @param size
  * @return
  */
 private DataBag createBag(int size) {
   Tuple innerTuple = TupleFactory.getInstance().newTuple();
   innerTuple.append(Integer.valueOf(1));
   DataBag bag = BagFactory.getInstance().newDefaultBag();
   for (int i = 0; i < size; i++) {
     bag.add(innerTuple);
   }
   return bag;
 }
Beispiel #14
0
 @Override
 public DataBag exec(Tuple input) throws IOException {
   DataBag samples = (DataBag) input.get(0);
   if (samples.size() <= numSamples) {
     return samples;
   } else {
     return super.exec(input);
   }
 }
 @Test
 public void emptySketch() throws Exception {
   EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
   ItemsSketch<String> sketch = new ItemsSketch<String>(8);
   Tuple inputTuple =
       PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));
   DataBag bag = func.exec(inputTuple);
   Assert.assertNotNull(bag);
   Assert.assertEquals(bag.size(), 0);
 }
Beispiel #16
0
  protected static long count(Tuple input) throws ExecException {
    DataBag values = (DataBag) input.get(0);
    Iterator it = values.iterator();
    long cnt = 0;
    while (it.hasNext()) {
      Tuple t = (Tuple) it.next();
      if (t != null && t.size() > 0 && t.get(0) != null) cnt++;
    }

    return cnt;
  }
  @Override
  public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) {
      return null;
    }

    Object obj = null;
    Integer limnum = null;
    try {
      obj = (DataByteArray) input.get(1);

    } catch (ExecException e) {
      logger.error("Error in reading field proto:", e);
      throw e;
    }

    try {
      limnum = (Integer) input.get(2);
    } catch (ExecException e) {
      logger.error("Error in reading baglimit:", e);
      throw e;
    }

    DataByteArray dba = null;
    try {
      dba = (DataByteArray) obj;
    } catch (ClassCastException e) {
      logger.error("Error in casting Object (" + input.getType(1) + ") to DataByteArray:", e);
      throw e;
    }

    DocumentMetadata dm = null;
    try {
      dm = DocumentMetadata.parseFrom(dba.get());
    } catch (InvalidProtocolBufferException e) {
      logger.error("Error in reading ByteArray to DocumentMetadata:", e);
      throw e;
    }

    String key = dm.getKey();
    DataBag db = new DefaultDataBag();
    int bagsize = 0;
    for (ClassifCode code : dm.getBasicMetadata().getClassifCodeList()) {
      for (String co_str : code.getValueList()) {
        bagsize++;
        db.add(TupleFactory.getInstance().newTuple(co_str));
      }
    }
    if (bagsize > limnum) {
      Object[] to = new Object[] {key, db, bagsize};
      return TupleFactory.getInstance().newTuple(Arrays.asList(to));
    }
    return null;
  }
  @Override
  public DataBag exec(Tuple input) throws IOException {
    retrieveContextValues();

    ArrayList<String> joinKeyNames = new ArrayList<String>();
    for (int i = 1; i < input.size(); i += 2) {
      joinKeyNames.add((String) input.get(i));
    }

    JoinCollector collector = new JoinCollector();
    // the first bag is the outer bag
    String leftBagName = bagNames.get(0);
    DataBag leftBag = getBag(input, leftBagName);
    String leftBagJoinKeyName =
        getPrefixedAliasName(bagNameToJoinKeyPrefix.get(leftBagName), joinKeyNames.get(0));
    collector.setJoinData(collector.groupTuples(leftBag, leftBagJoinKeyName));
    // now, for each additional bag, group up the tuples by the join key, then join them in
    if (bagNames.size() > 1) {
      for (int i = 1; i < bagNames.size(); i++) {
        String bagName = bagNames.get(i);
        DataBag bag = getBag(input, bagName);
        String joinKeyName =
            getPrefixedAliasName(bagNameToJoinKeyPrefix.get(bagName), joinKeyNames.get(i));
        int tupleSize = bagNameToSize.get(bagName);
        if (bag == null)
          throw new IOException(
              "Error in instance: "
                  + getInstanceName()
                  + " with properties: "
                  + getInstanceProperties()
                  + " and tuple: "
                  + input.toDelimitedString(", ")
                  + " -- Expected bag, got null");
        HashMap<Object, List<Tuple>> groupedData = collector.groupTuples(bag, joinKeyName);
        // outer join, so go back in and add nulls;
        groupedData = collector.insertNullTuples(groupedData, tupleSize);
        for (Map.Entry<Object, List<Tuple>> entry : groupedData.entrySet()) {
          collector.joinTuples(entry.getKey(), entry.getValue());
        }
      }
    }

    // assemble output bag
    DataBag outputBag = BagFactory.getInstance().newDefaultBag();
    for (List<Tuple> tuples : collector.getJoinData().values()) {
      for (Tuple tuple : tuples) {
        outputBag.add(tuple);
      }
    }

    return outputBag;
  }
  private static Tuple buildInitialTupleForTheRow(Tuple input) throws ExecException {
    int numberOfTheColumns = 0;

    Tuple row = null;
    if (null == input) {
      return null;
    } else if (input.get(0) instanceof DataBag) {
      DataBag values = (DataBag) input.get(0);
      Iterator<Tuple> it = values.iterator();
      row = it.next();
      numberOfTheColumns = row.size();
    } else {
      numberOfTheColumns = input.size();
      row = input;
    }

    Tuple vaTuple = initTuple(numberOfTheColumns);
    //        0      1      2          3         4          5
    // 2*3/2+2*2=7
    // x0,x1->sumx0,sumx1,sum(x0*x0),sum(x0x1),sum(x1*x1)
    int i6 = -6;
    for (int i = 0; i < numberOfTheColumns; i++) {
      for (int j = i + 1; j < numberOfTheColumns; j++) {
        i6 += 6;
        // Jeff: to fix pivotal41573093:Although x or y is null,we can calculate the count.
        // count
        increaseTheValueOfElInTheTupleBy(vaTuple, i6, 1);

        if (null == row.get(i) || null == row.get(j)) {
          continue;
        }

        Double x = DataType.toDouble(row.get(i));
        Double y = DataType.toDouble(row.get(j));

        // value x
        increaseTheValueOfElInTheTupleBy(vaTuple, i6 + 1, x);
        // value y
        increaseTheValueOfElInTheTupleBy(vaTuple, i6 + 2, y);
        // value xx
        increaseTheValueOfElInTheTupleBy(vaTuple, i6 + 3, x * x);
        // value yy
        increaseTheValueOfElInTheTupleBy(vaTuple, i6 + 4, y * y);
        // value xy
        increaseTheValueOfElInTheTupleBy(vaTuple, i6 + 5, x * y);
      }
    }

    return vaTuple;
  }
 @Override
 public Tuple exec(Tuple input) throws IOException {
   // Since Initial is guaranteed to be called
   // only in the map, it will be called with an
   // input of a bag with a single tuple - the
   // count should always be 1 if bag is non empty
   DataBag bag = (DataBag) input.get(0);
   Iterator<Tuple> it = bag.iterator();
   Tuple t = null;
   if (it.hasNext()) {
     t = (Tuple) it.next();
   }
   return mTupleFactory.newTuple((Object) MurmurHash.hash64(t));
 }
Beispiel #21
0
  protected static Tuple combine(DataBag values) throws ExecException {
    long sum = 0;
    long count = 0;

    // combine is called from Intermediate and Final
    // In either case, Initial would have been called
    // before and would have sent in valid tuples
    // Hence we don't need to check if incoming bag
    // is empty

    Tuple output = mTupleFactory.newTuple(2);
    boolean sawNonNull = false;
    for (Iterator<Tuple> it = values.iterator(); it.hasNext(); ) {
      Tuple t = it.next();
      Long l = (Long) t.get(0);
      // we count nulls in avg as contributing 0
      // a departure from SQL for performance of
      // COUNT() which implemented by just inspecting
      // size of the bag
      if (l == null) {
        l = 0L;
      } else {
        sawNonNull = true;
      }
      sum += l;
      count += (Long) t.get(1);
    }
    if (sawNonNull) {
      output.set(0, Long.valueOf(sum));
    } else {
      output.set(0, null);
    }
    output.set(1, Long.valueOf(count));
    return output;
  }
  private static void markTheTuple(Tuple input) throws ExecException {
    Tuple row = null;
    if (null == input) {
      return;
    } else if (input.get(0) instanceof DataBag) {
      DataBag values = (DataBag) input.get(0);
      Iterator<Tuple> it = values.iterator();
      row = it.next();
    } else {
      row = input;
    }

    if (null == row.get(row.size() - 1)) {
      row.set(row.size() - 1, MARKER);
      return;
    }
  }
  public void testSkewedJoinReducers() throws IOException {
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
    try {
      DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
      {
        pigServer.registerQuery("C = join A by id, B by id using \"skewed\" parallel 1;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        while (iter.hasNext()) {
          dbfrj.add(iter.next());
        }
      }
    } catch (Exception e) {
      fail("Should not throw exception, should continue execution");
    }
  }
  @Override
  public DataBag exec(Tuple input) throws IOException {

    if (input == null || input.size() == 0) {
      return null;
    }

    try {
      DataByteArray dba = null;
      try {
        dba = (DataByteArray) input.get(0);
      } catch (ExecException e) {
        logger.error("Error in reading field:", e);
        throw e;
      }

      DocumentWrapper dm = null;
      try {
        dm = DocumentWrapper.parseFrom(dba.get());
      } catch (Exception e) {
        logger.error("Error in reading ByteArray to DocumentMetadata:", e);
        throw e;
      }

      DataBag ret = new DefaultDataBag();
      DataByteArray metadata = new DataByteArray(dm.getDocumentMetadata().toByteArray());

      List<Author> authors = dm.getDocumentMetadata().getBasicMetadata().getAuthorList();

      for (int i = 0; i < authors.size(); i++) {
        String sname = authors.get(i).getSurname();
        Object[] to = new Object[] {sname, metadata, i};
        Tuple t = TupleFactory.getInstance().newTuple(Arrays.asList(to));
        ret.add(t);
      }

      return ret;

    } catch (Exception e) {
      logger.error("Error in processing input row:", e);
      throw new IOException(
          "Caught exception processing input row:\n" + StackTraceExtractor.getStackTrace(e));
    }
  }
Beispiel #25
0
  public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() < 1 || input.isNull(0)) return null;

    // Output bag
    DataBag bagOfTokens = bagFactory.newDefaultBag();

    StringReader textInput = new StringReader(input.get(0).toString());
    PTBTokenizer ptbt = new PTBTokenizer(textInput, new CoreLabelTokenFactory(), "");

    for (CoreLabel label; ptbt.hasNext(); ) {
      label = (CoreLabel) ptbt.next();
      if (label.value().length() > 2) {
        System.err.println(label.toString());
        Tuple termText = tupleFactory.newTuple(label.word());
        bagOfTokens.add(termText);
      }
    }
    return bagOfTokens;
  }
 @Override
 public void accumulate(Tuple b) throws IOException {
   try {
     DataBag bag = (DataBag) b.get(0);
     Iterator<Tuple> it = bag.iterator();
     while (it.hasNext()) {
       Tuple t = (Tuple) it.next();
       if (t != null && t.size() > 0) {
         accumulate(t, this);
       }
     }
   } catch (ExecException ee) {
     throw ee;
   } catch (Exception e) {
     int errCode = 2106;
     String msg = "Error while computing min in " + this.getClass().getSimpleName();
     throw new ExecException(msg, errCode, PigException.BUG, e);
   }
 }
Beispiel #27
0
    @Override
    public DataBag exec(Tuple input) throws IOException {
      DataBag bagOfSamples = (DataBag) input.get(0);
      for (Tuple innerTuple : bagOfSamples) {
        DataBag samples = (DataBag) innerTuple.get(0);

        for (Tuple sample : samples) {
          // use the same score as previously generated
          getReservoir().consider(ScoredTuple.fromIntermediateTuple(sample));
        }
      }

      DataBag output = BagFactory.getInstance().newDefaultBag();
      for (ScoredTuple scoredTuple : getReservoir()) {
        // output the original tuple
        output.add(scoredTuple.getTuple());
      }

      return output;
    }
Beispiel #28
0
  @Override
  public void setInput(Map<String, Block> input, JsonNode operatorJson, BlockProperties props)
      throws IOException, InterruptedException {
    inputBlock = input.values().iterator().next();

    init(operatorJson, inputBlock.getProperties().getSchema());

    nullBag = BagFactory.getInstance().newDefaultBag();

    nullBag.add(TupleFactory.getInstance().newTuple(0));
  }
  public void testSkewedJoin3Way() throws IOException {
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
    pigServer.registerQuery("C = LOAD '" + INPUT_FILE3 + "' as (id, name);");
    try {
      DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
      {
        pigServer.registerQuery("D = join A by id, B by id, C by id using \"skewed\" parallel 5;");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        while (iter.hasNext()) {
          dbfrj.add(iter.next());
        }
      }
    } catch (Exception e) {
      return;
    }

    fail("Should throw exception, do not support 3 way join");
  }
  public void testSkewedJoinNullKeys() throws IOException {
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE5 + "' as (id,name);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE5 + "' as (id,name);");
    try {
      DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
      {
        pigServer.registerQuery("C = join A by id, B by id using \"skewed\";");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        while (iter.hasNext()) {
          dbfrj.add(iter.next());
        }
      }
    } catch (Exception e) {
      System.out.println(e.getMessage());
      e.printStackTrace();
      fail("Should support null keys in skewed join");
    }
    return;
  }