@Override
  public int getPartition(PigNullableWritable wrappedKey, Writable value, int numPartitions) {
    // for streaming tables, return the partition index blindly
    if (wrappedKey instanceof NullablePartitionWritable
        && (((NullablePartitionWritable) wrappedKey).getPartition()) != -1) {
      return ((NullablePartitionWritable) wrappedKey).getPartition();
    }

    // for partition table, compute the index based on the sampler output
    Pair<Integer, Integer> indexes;
    Integer curIndex = -1;
    Tuple keyTuple = TupleFactory.getInstance().newTuple(1);

    // extract the key from nullablepartitionwritable
    PigNullableWritable key = ((NullablePartitionWritable) wrappedKey).getKey();

    try {
      keyTuple.set(0, key.getValueAsPigType());
    } catch (ExecException e) {
      return -1;
    }

    // if the key is not null and key
    if (key instanceof NullableTuple && key.getValueAsPigType() != null) {
      keyTuple = (Tuple) key.getValueAsPigType();
    }

    // if the partition file is empty, use numPartitions
    totalReducers = (totalReducers > 0) ? totalReducers : numPartitions;

    indexes = reducerMap.get(keyTuple);
    // if the reducerMap does not contain the key, do the default hash based partitioning
    if (indexes == null) {
      return (Math.abs(keyTuple.hashCode() % totalReducers));
    }

    if (currentIndexMap.containsKey(keyTuple)) {
      curIndex = currentIndexMap.get(keyTuple);
    }

    if (curIndex >= (indexes.first + indexes.second) || curIndex == -1) {
      curIndex = indexes.first;
    } else {
      curIndex++;
    }

    // set it in the map
    currentIndexMap.put(keyTuple, curIndex);
    return (curIndex % totalReducers);
  }
Example #2
0
  @Override
  public Result getNextTuple() throws ExecException {
    res = super.getNextTuple();
    if (writer == null) { // In the case of combiner
      return res;
    }

    try {
      switch (res.returnStatus) {
        case POStatus.STATUS_OK:
          if (illustrator == null) {
            Tuple result = (Tuple) res.result;
            Byte index = (Byte) result.get(0);
            PigNullableWritable key = HDataType.getWritableComparableTypes(result.get(1), keyType);
            NullableTuple val = new NullableTuple((Tuple) result.get(2));

            // Both the key and the value need the index.  The key needs it so
            // that it can be sorted on the index in addition to the key
            // value.  The value needs it so that POPackage can properly
            // assign the tuple to its slot in the projection.
            key.setIndex(index);
            val.setIndex(index);
            if (isSkewedJoin) {
              // Wrap into a NullablePartitionWritable to match the key
              // of the right table from POPartitionRearrangeTez for the skewed join
              NullablePartitionWritable wrappedKey = new NullablePartitionWritable(key);
              wrappedKey.setPartition(-1);
              key = wrappedKey;
            }
            writer.write(key, val);
          } else {
            illustratorMarkup(res.result, res.result, 0);
          }
          res = RESULT_EMPTY;
          break;
        case POStatus.STATUS_EOP:
        case POStatus.STATUS_ERR:
        case POStatus.STATUS_NULL:
        default:
          break;
      }
    } catch (IOException ioe) {
      int errCode = 2135;
      String msg = "Received error from POLocalRearrage function." + ioe.getMessage();
      throw new ExecException(msg, errCode, ioe);
    }
    return inp;
  }
    @Override
    public void collect(Context oc, Tuple tuple) throws InterruptedException, IOException {

      Byte index = (Byte) tuple.get(0);
      PigNullableWritable key = HDataType.getWritableComparableTypes(tuple.get(1), keyType);
      NullableTuple val = new NullableTuple((Tuple) tuple.get(2));

      // Both the key and the value need the index.  The key needs it so
      // that it can be sorted on the index in addition to the key
      // value.  The value needs it so that POPackage can properly
      // assign the tuple to its slot in the projection.
      key.setIndex(index);
      val.setIndex(index);

      oc.write(key, val);
    }
Example #4
0
 /**
  * Attaches the required inputs
  *
  * @param k - the key being worked on
  * @param inp - iterator of indexed tuples typically obtained from Hadoop
  */
 public void attachInput(PigNullableWritable k, Iterator<NullableTuple> inp) {
   tupIter = inp;
   key = k.getValueAsPigType();
   if (useSecondaryKey) {
     try {
       key = ((Tuple) key).get(0);
     } catch (ExecException e) {
       // TODO Exception
       throw new RuntimeException(e);
     }
   }
   if (isKeyTuple) {
     // key is a tuple, cache the key as a
     // tuple for use in the getNext()
     keyAsTuple = (Tuple) key;
   }
 }
  private void runTest(Object key, boolean inner[], byte keyType)
      throws ExecException, IOException {
    Random r = new Random();
    DataBag db1 = GenRandomData.genRandSmallTupDataBag(r, 10, 100);
    DataBag db2 = GenRandomData.genRandSmallTupDataBag(r, 10, 100);
    List<NullableTuple> db = new ArrayList<NullableTuple>(200);
    Iterator<Tuple> db1Iter = db1.iterator();
    if (!inner[0]) {
      while (db1Iter.hasNext()) {
        NullableTuple it = new NullableTuple(db1Iter.next());
        it.setIndex((byte) 0);
        db.add(it);
      }
    }
    Iterator<Tuple> db2Iter = db2.iterator();
    while (db2Iter.hasNext()) {
      NullableTuple it = new NullableTuple(db2Iter.next());
      it.setIndex((byte) 1);
      db.add(it);
    }
    // ITIterator iti = new TestPackage.ITIterator(db.iterator());
    POPackage pop = new POPackage(new OperatorKey("", r.nextLong()));
    pop.setNumInps(2);
    pop.getPkgr().setInner(inner);
    PigNullableWritable k = HDataType.getWritableComparableTypes(key, keyType);
    pop.attachInput(k, db.iterator());
    if (keyType != DataType.BAG) {
      // test serialization
      NullablePartitionWritable wr;
      if (keyType == DataType.TUPLE) {
        BinSedesTuple tup =
            (BinSedesTuple) binfactory.newTupleNoCopy(((Tuple) k.getValueAsPigType()).getAll());
        wr = new NullablePartitionWritable(new NullableTuple(tup));
      } else {
        wr = new NullablePartitionWritable(k);
      }
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      DataOutputStream out = new DataOutputStream(baos);
      wr.write(out);
      byte[] arr = baos.toByteArray();
      ByteArrayInputStream bais = new ByteArrayInputStream(arr);
      DataInputStream in = new DataInputStream(bais);
      NullablePartitionWritable re = new NullablePartitionWritable();
      re.readFields(in);
      assertEquals(re, wr);
    }

    // we are not doing any optimization to remove
    // parts of the "value" which are present in the "key" in this
    // unit test - so set up the "keyInfo" accordingly in
    // the POPackage
    Map<Integer, Pair<Boolean, Map<Integer, Integer>>> keyInfo =
        new HashMap<Integer, Pair<Boolean, Map<Integer, Integer>>>();
    Pair<Boolean, Map<Integer, Integer>> p =
        new Pair<Boolean, Map<Integer, Integer>>(false, new HashMap<Integer, Integer>());
    keyInfo.put(0, p);
    keyInfo.put(1, p);
    pop.getPkgr().setKeyInfo(keyInfo);
    Tuple t = null;
    Result res = null;
    res = pop.getNextTuple();
    if (res.returnStatus == POStatus.STATUS_NULL && inner[0]) return;
    assertEquals(POStatus.STATUS_OK, res.returnStatus);

    t = (Tuple) res.result;
    Object outKey = t.get(0);
    DataBag outDb1 = (DataBag) t.get(1);
    DataBag outDb2 = (DataBag) t.get(2);

    assertEquals(key, outKey);
    assertTrue(TestHelper.compareBags(db1, outDb1));
    assertTrue(TestHelper.compareBags(db2, outDb2));
  }
    /**
     * The reduce function which packages the key and List&lt;Tuple&gt; into key, Bag&lt;Tuple&gt;
     * after converting Hadoop type key into Pig type. The package result is either collected as is,
     * if the reduce plan is empty or after passing through the reduce plan.
     */
    @Override
    protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
        throws IOException, InterruptedException {

      if (!initialized) {
        initialized = true;

        // cache the collector for use in runPipeline()
        // which could additionally be called from close()
        this.outputCollector = context;
        pigReporter.setRep(context);
        PhysicalOperator.setReporter(pigReporter);

        boolean aggregateWarning =
            "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
        PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
        pigStatusReporter.setContext(new MRTaskContext(context));
        PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
        pigHadoopLogger.setReporter(pigStatusReporter);
        pigHadoopLogger.setAggregate(aggregateWarning);
        PhysicalOperator.setPigLogger(pigHadoopLogger);

        for (POStore store : stores) {
          MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context);
          store.setStoreImpl(impl);
          store.setUp();
        }
      }

      // If the keyType is not a tuple, the MapWithComparator.collect()
      // would have wrapped the key into a tuple so that the
      // comparison UDF used in the order by can process it.
      // We need to unwrap the key out of the tuple and hand it
      // to the POPackage for processing
      if (keyType != DataType.TUPLE) {
        Tuple t = (Tuple) (key.getValueAsPigType());
        try {
          key = HDataType.getWritableComparableTypes(t.get(0), keyType);
        } catch (ExecException e) {
          throw e;
        }
      }

      pack.attachInput(key, tupIter.iterator());

      Result res = pack.getNextTuple();
      if (res.returnStatus == POStatus.STATUS_OK) {
        Tuple packRes = (Tuple) res.result;

        if (rp.isEmpty()) {
          context.write(null, packRes);
          return;
        }

        rp.attachInput(packRes);

        List<PhysicalOperator> leaves = rp.getLeaves();

        PhysicalOperator leaf = leaves.get(0);
        runPipeline(leaf);
      }

      if (res.returnStatus == POStatus.STATUS_NULL) {
        return;
      }

      if (res.returnStatus == POStatus.STATUS_ERR) {
        int errCode = 2093;
        String msg = "Encountered error in package operator while processing group.";
        throw new ExecException(msg, errCode, PigException.BUG);
      }
    }