/**
   * See PIG-2936. The purpose of this test is to ensure that Tuples are being serialized in the
   * specific way that we expect.
   */
  @Test
  public void testTupleSerializationSpecific() throws Exception {
    byte[] flags = {
      BinInterSedes.TUPLE_0,
      BinInterSedes.TUPLE_1,
      BinInterSedes.TUPLE_2,
      BinInterSedes.TUPLE_3,
      BinInterSedes.TUPLE_4,
      BinInterSedes.TUPLE_5,
      BinInterSedes.TUPLE_6,
      BinInterSedes.TUPLE_7,
      BinInterSedes.TUPLE_8,
      BinInterSedes.TUPLE_9,
    };

    for (int i = 0; i < flags.length; i++) {
      Tuple t = mTupleFactory.newTuple(i);

      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      DataOutput out = new DataOutputStream(baos);
      out.writeByte(flags[i]);

      for (int j = 0; j < i; j++) {
        Integer val = Integer.valueOf(random.nextInt());
        bis.writeDatum(out, val);
        t.set(j, val);
      }

      testSerTuple(t, baos.toByteArray());
    }
  }
Пример #2
0
    @Override
    public Double exec(Tuple input) throws IOException {
      try {
        DataBag b = (DataBag) input.get(0);
        Tuple combined = combine(b);

        Long sum = (Long) combined.get(0);
        if (sum == null) {
          return null;
        }
        double count = (Long) combined.get(1);

        Double avg = null;
        if (count > 0) {
          avg = new Double(sum / count);
        }
        return avg;
      } catch (ExecException ee) {
        throw ee;
      } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing average in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);
      }
    }
Пример #3
0
  @Override
  public Long exec(Tuple input) throws IOException {
    try {
      String tinput = "";
      if (input == null || input.size() == 0) return null;
      else {
        if (input.getType(0) == DataType.CHARARRAY) tinput = (String) input.get(0);
        else
          throw new RuntimeException(
              "Input type expected to be chararray but got: " + input.getType(0));
      }
      tinput = tinput.replaceAll("[-+.^:, ]", "");

      if (tinput.length() > 14) return Long.parseLong(tinput.substring(0, 14));
      else if (tinput.length() < 14)
        return Long.parseLong(String.format("%-14s", tinput).replace(' ', '0'));
      else return Long.parseLong(tinput);

    } catch (ExecException exp) {
      throw exp;
    } catch (Exception e) {
      int errCode = 2107;
      String msg = "Error while computing date_format in " + this.getClass().getSimpleName();
      throw new ExecException(msg, errCode, PigException.BUG, e);
    }
  }
Пример #4
0
  @Override
  protected void doHadoopWork() throws BuildException {
    Tuple tuple = ContextManager.getCurrentTuple();
    if (tuple == null) {
      throw new BuildException(
          this.getTaskName()
              + " should be put inside task container which provides tuple to execution context");
    }

    try {
      if (tuple.getType(fieldNumber) != DataType.TUPLE
          || !(tuple.get(fieldNumber) instanceof Tuple)) {
        throw new BuildException("Tuple field " + fieldNumber + " doesn't represent a Tuple");
      }

      ContextManager.setCurrentTupleContext((Tuple) tuple.get(fieldNumber));

      try {
        for (Task task : tasks) {
          task.perform();
        }
      } finally {
        ContextManager.resetCurrentTupleContext();
      }
    } catch (ExecException e) {
      throw new BuildException("Failed to check type of tuple field " + fieldNumber, e);
    }
  }
Пример #5
0
 private static Tuple extractKeys(Tuple t, List<Integer> keyFields) throws ExecException {
   Tuple keys = new DefaultTuple();
   for (int keyField : keyFields) {
     keys.append(t.get(keyField));
   }
   return keys;
 }
Пример #6
0
  @Override
  public String exec(Tuple input) throws IOException {

    // validate input
    if (input == null || input.size() == 0 || input.get(0) == null) {
      return null;
    }

    // get the value of input
    String strAddress = (String) input.get(0);

    // Get geoip information
    try {
      String result = this.geo.getCountryName(strAddress);

      // replace "--" and "N/A" to null, better for pig
      if (result == null || result.equals("--") || result.equals("N/A")) {
        return null;
      } else {
        return result;
      }

    } catch (Exception e) {
      // e.printStackTrace();
      return null;
    }
  }
Пример #7
0
  public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) {
      return null;
    }

    String normStr = ((String) input.get(0));
    if (normStr == null) {
      return null;
    }

    // Remove punctuation except when it's a version number
    normStr = punctPattern.matcher(normStr.trim().toLowerCase()).replaceAll(" ");
    normStr = spacePattern.matcher(normStr).replaceAll(" ");

    DataBag output = bagFactory.newDefaultBag();
    for (String s : spacePattern.split(normStr.trim())) {
      if (s.length() <= 30) {
        Tuple t = tupleFactory.newTuple(1);
        t.set(0, s);
        output.add(t);
      }
    }

    return output;
  }
Пример #8
0
  protected static Long sum(Tuple input) throws ExecException, IOException {
    DataBag values = (DataBag) input.get(0);

    // if we were handed an empty bag, return NULL
    if (values.size() == 0) {
      return null;
    }

    long sum = 0;
    boolean sawNonNull = false;
    for (Iterator<Tuple> it = values.iterator(); it.hasNext(); ) {
      Tuple t = it.next();
      try {
        Long l = (Long) (t.get(0));
        if (l == null) continue;
        sawNonNull = true;
        sum += l;
      } catch (RuntimeException exp) {
        int errCode = 2103;
        String msg = "Problem while computing sum of longs.";
        throw new ExecException(msg, errCode, PigException.BUG, exp);
      }
    }

    if (sawNonNull) {
      return Long.valueOf(sum);
    } else {
      return null;
    }
  }
    @Override
    public Tuple exec(Tuple input) throws IOException {
      Tuple output = tupleFactory.newTuple();
      DataBag selected = bagFactory.newDefaultBag();
      DataBag waiting = bagFactory.newSortedBag(new ScoredTupleComparator());

      DataBag items = (DataBag) input.get(0);

      if (items != null) {
        long n = items.size();

        double q1 = getQ1(n, _samplingProbability);
        double q2 = getQ2(n, _samplingProbability);

        for (Tuple item : items) {
          double key = _rdg.nextUniform(0.0d, 1.0d);

          if (key < q1) {
            selected.add(item);
          } else if (key < q2) {
            waiting.add(new ScoredTuple(key, item).getIntermediateTuple(tupleFactory));
          }
        }

        output.append(n);
        output.append(selected);
        output.append(waiting);
      }

      return output;
    }
Пример #10
0
  protected static Long sumLongs(Tuple input) throws ExecException {
    // Can't just call sum, because the intermediate results are
    // now Longs insteads of Integers.
    DataBag values = (DataBag) input.get(0);

    // if we were handed an empty bag, return NULL
    // this is in compliance with SQL standard
    if (values.size() == 0) {
      return null;
    }

    long sum = 0;
    boolean sawNonNull = false;
    for (Iterator<Tuple> it = values.iterator(); it.hasNext(); ) {
      Tuple t = it.next();
      try {
        Long l = (Long) (t.get(0));
        if (l == null) continue;
        sawNonNull = true;
        sum += l;
      } catch (RuntimeException exp) {
        int errCode = 2103;
        String msg = "Problem while computing sum of longs.";
        throw new ExecException(msg, errCode, PigException.BUG, exp);
      }
    }

    if (sawNonNull) {
      return Long.valueOf(sum);
    } else {
      return null;
    }
  }
Пример #11
0
  @Test
  public void coalesceCastIntToDatetimeLazyTest() throws Exception {
    PigTest test = createPigTestFromString(coalesceCastIntToDatetimeLazyTest);

    this.writeLinesToFile("input", "1,1375826183000", "2,");

    test.runScript();

    List<Tuple> lines = this.getLinesForAlias(test, "data3");

    Assert.assertEquals(2, lines.size());
    for (Tuple t : lines) {
      Integer testcase = (Integer) t.get(0);
      Assert.assertNotNull(testcase);
      switch (testcase) {
        case 1:
          Assert.assertEquals(
              "2013-08-06T21:56:23.000Z",
              ((DateTime) t.get(1)).toDateTime(DateTimeZone.UTC).toString());
          break;
        case 2:
          Assert.assertEquals("1970-01-01T00:00:00.000Z", t.get(1).toString());
          break;
        default:
          Assert.fail("Did not expect: " + t.get(1));
      }
    }
  }
 private int compareTuple(Tuple t1, Tuple t2) {
   int sz1 = t1.size();
   int sz2 = t2.size();
   if (sz2 < sz1) {
     return 1;
   } else if (sz2 > sz1) {
     return -1;
   } else {
     for (int i = 0; i < sz1; i++) {
       try {
         Object o1 = t1.get(i);
         Object o2 = t2.get(i);
         if (o1 == null || o2 == null) mHasNullField = true;
         int c = DataType.compare(o1, o2);
         if (c != 0) {
           if (!mWholeTuple && !mAsc[i]) c *= -1;
           else if (mWholeTuple && !mAsc[0]) c *= -1;
           return c;
         }
       } catch (ExecException e) {
         throw new RuntimeException("Unable to compare tuples", e);
       }
     }
     return 0;
   }
 }
Пример #13
0
  private HashMap<String, Object> createMap(Tuple input) throws IOException {

    try {

      HashMap<String, Object> map = new HashMap<String, Object>();

      if (input == null || input.size() == 0) {
        return map; // an empty map
      }

      for (int i = 0; i < input.size(); i = i + 2) {

        String key = input.get(i).toString();
        if (null != key && (i + 1 < input.size())) {

          map.put(key, input.get(i + 1));
        }
      }

      return map;

    } catch (Exception e) {

      int errCode = 2106;
      String msg = "Error while creating map with" + this.getClass().getSimpleName();
      throw new ExecException(msg, errCode, PigException.BUG, e);
    }
  }
Пример #14
0
  // See PIG-1434
  @Test
  public void testScalarWithNoSchemaDollarProj() throws Exception {
    String[] scalarInput = {"1\t5"};
    String[] input = {"1\t5", "2\t10", "3\t20"};
    String inputPath = BUILD_TEST_TMP + "table_testScalarWithNoSchemaDollarProj";
    TestScalarAliases.createLocalInputFile(inputPath, input);
    String inputPathScalar = BUILD_TEST_TMP + "table_testScalarWithNoSchemaDollarProjScalar";
    TestScalarAliases.createLocalInputFile(inputPathScalar, scalarInput);
    // Load A as a scalar
    pigServer.registerQuery("A = LOAD '" + inputPath + "';");
    pigServer.registerQuery("scalar = LOAD '" + inputPathScalar + "';");
    pigServer.registerQuery("B = foreach A generate 5 / scalar.$1;");

    Iterator<Tuple> iter = pigServer.openIterator("B");

    Tuple t = iter.next();
    assertTrue(t.get(0).toString().equals("1"));

    t = iter.next();
    assertTrue(t.get(0).toString().equals("1"));

    t = iter.next();
    assertTrue(t.get(0).toString().equals("1"));

    assertFalse(iter.hasNext());
  }
Пример #15
0
  // See PIG-1434
  @Test
  public void testScalarAliasesFilterClause() throws Exception {
    String[] input = {"1\t5", "2\t10", "3\t20", "4\t12", "5\t8"};

    // Test the use of scalars in expressions
    String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesFilterClause";
    TestScalarAliases.createLocalInputFile(inputPath, input);
    // Test in script mode
    pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0, a1);");
    pigServer.registerQuery("G = group A all;");
    pigServer.registerQuery("C = foreach G generate AVG(A.$1) as average;");

    pigServer.registerQuery("Y = filter A by a1 > C.average;");

    Iterator<Tuple> iter = pigServer.openIterator("Y");

    // Average is 11
    Tuple t = iter.next();
    assertTrue(t.toString().equals("(3,20)"));

    t = iter.next();
    assertTrue(t.toString().equals("(4,12)"));

    assertFalse(iter.hasNext());
  }
Пример #16
0
  /**
   * Creates a serialized S4 event given Pig data.
   *
   * <p>All field names in the input tuple must match the name of a setter method in the event. For
   * example, an input field named "value" will invoke the <tt>setValue</tt> method when creating an
   * event. Setters are always called in the order specified in the constructor. A <tt>null</tt>
   * value means the setter for that field is not called.
   *
   * <p>Type mismatches will produce an exception. Differences in case are ignored.
   *
   * @param input Tuple of values for each field, in the order provided to the constructor.
   * @return Serialized version of the event.
   */
  public DataByteArray exec(Tuple input) throws IOException {
    if (input == null || input.size() < methods.size()) return null;

    // create empty event object
    Object event;
    try {
      event = eventClass.newInstance();
    } catch (Exception e) {
      e.printStackTrace();
      return null;
    }

    // iterate through fields setting values
    for (int i = 0; i < methods.size(); i++) {
      if (input.get(i) != null) {

        MethodNamePair pair = methods.get(i);
        try {
          Method m = pair.method;
          m.invoke(event, input.get(i));
        } catch (Exception e) {
          e.printStackTrace();
          return null;
        }
      }
    }

    // serialize event
    Tuple outputTuple = tupleFactory.newTuple(2);
    byte[] rawEvent = serializer.serialize(event);
    DataByteArray serializedEvent = new DataByteArray(rawEvent);

    return serializedEvent;
  }
  @Test
  public void exact() throws Exception {
    EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
    ItemsSketch<String> sketch = new ItemsSketch<String>(8);
    sketch.update("a");
    sketch.update("a");
    sketch.update("b");
    Tuple inputTuple =
        PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));
    DataBag bag = func.exec(inputTuple);
    Assert.assertNotNull(bag);
    Assert.assertEquals(bag.size(), 2);

    Iterator<Tuple> it = bag.iterator();
    Tuple tuple1 = it.next();
    Assert.assertEquals(tuple1.size(), 4);
    Assert.assertEquals((String) tuple1.get(0), "a");
    Assert.assertEquals((long) tuple1.get(1), 2L);
    Assert.assertEquals((long) tuple1.get(2), 2L);
    Assert.assertEquals((long) tuple1.get(3), 2L);

    Tuple tuple2 = it.next();
    Assert.assertEquals(tuple2.size(), 4);
    Assert.assertEquals((String) tuple2.get(0), "b");
    Assert.assertEquals((long) tuple2.get(1), 1L);
    Assert.assertEquals((long) tuple2.get(2), 1L);
    Assert.assertEquals((long) tuple2.get(3), 1L);
  }
Пример #18
0
  @Override
  public DataBag exec(Tuple input) throws IOException {
    try {

      DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();

      if (input == null || input.size() == 0) {
        return bag; // an empty bag
      }
      if (this.fieldType == DataType.MAP) {

        Tuple t = DefaultTupleFactory.getInstance().newTuple(1);
        t.set(0, createMap(input));

        bag.add(t);

      } else {
        bag.add(input);
      }

      return bag;

    } catch (Exception e) {
      throw new RuntimeException(
          "Error while computing size in " + this.getClass().getSimpleName());
    }
  }
Пример #19
0
  public void cat(StringBuilder sb, Object input, String delim) throws IOException {
    if (input == null) return;

    if (input instanceof Tuple) {
      Tuple tuple = (Tuple) input;

      for (Object o : tuple.getAll()) {
        cat(sb, o, delim);
      }
    } else if (input instanceof DataBag) {
      DataBag bag = (DataBag) input;

      for (Tuple t : bag) {
        for (Object o : t.getAll()) {
          cat(sb, o, delim);
        }
      }
    } else {
      String s = input.toString();

      s = s.trim();

      if (s.length() > 0) {
        sb.append(s).append(delim);
      }
    }
  }
    @Override
    public DataBag exec(Tuple input) throws IOException {
      DataBag bag = (DataBag) input.get(0);
      long n = 0L;
      DataBag selected = bagFactory.newDefaultBag();
      DataBag waiting = bagFactory.newSortedBag(new ScoredTupleComparator());

      for (Tuple innerTuple : bag) {
        n += (Long) innerTuple.get(0);
        selected.addAll((DataBag) innerTuple.get(1));
        waiting.addAll((DataBag) innerTuple.get(2));
      }

      long sampleSize = (long) Math.ceil(_samplingProbability * n);
      long nNeeded = sampleSize - selected.size();

      for (Tuple scored : waiting) {
        if (nNeeded <= 0) {
          break;
        }
        selected.add(ScoredTuple.fromIntermediateTuple(scored).getTuple());
        nNeeded--;
      }

      return selected;
    }
Пример #21
0
 @Override
 public Tuple exec(Tuple input) throws IOException {
   // Initial is called in the map - for SUM
   // we just send the tuple down
   try {
     // input is a bag with one tuple containing
     // the column we are trying to sum
     DataBag bg = (DataBag) input.get(0);
     Integer i = null;
     if (bg.iterator().hasNext()) {
       Tuple tp = bg.iterator().next();
       i = (Integer) tp.get(0);
     }
     return tfact.newTuple(i != null ? Long.valueOf(i) : null);
   } catch (NumberFormatException nfe) {
     // treat this particular input as null
     Tuple t = tfact.newTuple(1);
     t.set(0, null);
     return t;
   } catch (ExecException e) {
     throw e;
   } catch (Exception e) {
     int errCode = 2106;
     String msg = "Error while computing sum in " + this.getClass().getSimpleName();
     throw new ExecException(msg, errCode, PigException.BUG, e);
   }
 }
Пример #22
0
  @Override
  public DateTime exec(Tuple input) throws IOException {
    if (input == null || input.size() < 2) {
      return null;
    }

    return ((DateTime) input.get(0)).plus(new Period((String) input.get(1)));
  }
Пример #23
0
 public void printData(HashMap<Object, List<Tuple>> data) throws ExecException {
   for (Object o : data.keySet()) {
     System.out.println(o);
     for (Tuple t : data.get(o)) {
       System.out.println("\t" + t.toDelimitedString(", "));
     }
   }
 }
 private Tuple createTupleWithManyCols(int size) {
   Tuple t = TupleFactory.getInstance().newTuple(size);
   Integer col = Integer.valueOf(1);
   for (int i = 0; i < size; i++) {
     t.append(col);
   }
   return t;
 }
  @Override
  public Block next() throws IOException, InterruptedException {
    Tuple metaDataTuple = matchingMetaBlock.next();
    if (metaDataTuple == null) return null; // Done

    System.out.println("Collate Vector: metadata tuple = " + metaDataTuple.toString());
    return generateVectorBlock(metaDataTuple);
  }
Пример #26
0
  protected Tuple getValueTuple(NullableTuple ntup, int index) throws ExecException {
    // Need to make a copy of the value, as hadoop uses the same ntup
    // to represent each value.
    Tuple val = (Tuple) ntup.getValueAsPigType();

    Tuple copy = null;
    // The "value (val)" that we just got may not
    // be the complete "value". It may have some portions
    // in the "key" (look in POLocalRearrange for more comments)
    // If this is the case we need to stitch
    // the "value" together.
    Pair<Boolean, Map<Integer, Integer>> lrKeyInfo = keyInfo.get(index);
    boolean isProjectStar = lrKeyInfo.first;
    Map<Integer, Integer> keyLookup = lrKeyInfo.second;
    int keyLookupSize = keyLookup.size();

    if (keyLookupSize > 0) {

      // we have some fields of the "value" in the
      // "key".
      copy = mTupleFactory.newTuple();
      int finalValueSize = keyLookupSize + val.size();
      int valIndex = 0; // an index for accessing elements from
      // the value (val) that we have currently
      for (int i = 0; i < finalValueSize; i++) {
        Integer keyIndex = keyLookup.get(i);
        if (keyIndex == null) {
          // the field for this index is not in the
          // key - so just take it from the "value"
          // we were handed
          copy.append(val.get(valIndex));
          valIndex++;
        } else {
          // the field for this index is in the key
          if (isKeyTuple) {
            // the key is a tuple, extract the
            // field out of the tuple
            copy.append(keyAsTuple.get(keyIndex));
          } else {
            copy.append(key);
          }
        }
      }

    } else if (isProjectStar) {

      // the whole "value" is present in the "key"
      copy = mTupleFactory.newTuple(keyAsTuple.getAll());

    } else {

      // there is no field of the "value" in the
      // "key" - so just make a copy of what we got
      // as the "value"
      copy = mTupleFactory.newTuple(val.getAll());
    }
    return copy;
  }
 /**
  * create bag having given number of tuples
  *
  * @param size
  * @return
  */
 private DataBag createBag(int size) {
   Tuple innerTuple = TupleFactory.getInstance().newTuple();
   innerTuple.append(Integer.valueOf(1));
   DataBag bag = BagFactory.getInstance().newDefaultBag();
   for (int i = 0; i < size; i++) {
     bag.add(innerTuple);
   }
   return bag;
 }
Пример #28
0
  @Override
  public Tuple next() throws IOException, InterruptedException {
    Tuple tuple = block.next();
    if (tuple == null) return null;

    for (int i = 0; i < columnCopyMap.length; i++) outputTuple.set(columnCopyMap[i], tuple.get(i));

    return outputTuple;
  }
Пример #29
0
  @SuppressWarnings("unchecked")
  @Override
  public DataBag exec(Tuple input) throws IOException {
    if (input.size() < 2) {
      throw new RuntimeException("Expected at least two inputs, but found " + input.size());
    }

    for (Object o : input) {
      if (o != null && !(o instanceof DataBag)) {
        throw new RuntimeException("Inputs must be bags");
      }
    }

    DataBag outputBag = bagFactory.newDefaultBag();

    DataBag bag1 = (DataBag) input.get(0);
    DataBag bag2 = (DataBag) input.get(1);

    if (bag1 == null || bag1.size() == 0) {
      return outputBag;
    }
    // optimization
    else if (input.size() == 2 && (bag2 == null || bag2.size() == 0)) {
      return bag1;
    }

    PriorityQueue<Pair> pq = loadBags(input);

    Tuple lastData = null;

    while (true) {
      Pair nextPair = pq.peek();

      // ignore data we've already encountered
      if (nextPair.data.compareTo(lastData) != 0) {
        // Only take data from the first bag, where there are no other
        // bags that have the same data.
        if (nextPair.index.equals(0) && countMatches(pq) == 0) {
          outputBag.add(nextPair.data);
          lastData = nextPair.data;
        }
      }

      Pair p = pq.poll();

      // only put the bag back into the queue if it still has data
      if (p.hasNext()) {
        p.next();
        pq.offer(p);
      } else if (p.index.equals(0)) {
        // stop when we exhaust all elements from the first bag
        break;
      }
    }

    return outputBag;
  }
Пример #30
0
 public String exec(Tuple input) throws IOException {
   if (input == null || input.size() == 0) return null;
   try {
     String str = (String) input.get(0);
     return str.toUpperCase();
   } catch (Exception e) {
     throw WrappedIOException.wrap("Caught exception processing input row ", e);
   }
 }