Ejemplo n.º 1
0
    /**
     * The reduce function which packages the key and List<Tuple> into key, Bag<Tuple>
     * after converting Hadoop type key into Pig type. The package result is either collected as is,
     * if the reduce plan is empty or after passing through the reduce plan.
     */
    @Override
    protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
        throws IOException, InterruptedException {

      if (!initialized) {
        initialized = true;

        // cache the collector for use in runPipeline()
        // which could additionally be called from close()
        this.outputCollector = context;
        pigReporter.setRep(context);
        PhysicalOperator.setReporter(pigReporter);

        boolean aggregateWarning =
            "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
        PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
        pigStatusReporter.setContext(new MRTaskContext(context));
        PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
        pigHadoopLogger.setReporter(pigStatusReporter);
        pigHadoopLogger.setAggregate(aggregateWarning);
        PhysicalOperator.setPigLogger(pigHadoopLogger);

        if (!inIllustrator)
          for (POStore store : stores) {
            MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context);
            store.setStoreImpl(impl);
            store.setUp();
          }
      }

      // In the case we optimize the join, we combine
      // POPackage and POForeach - so we could get many
      // tuples out of the getnext() call of POJoinPackage
      // In this case, we process till we see EOP from
      // POJoinPacakage.getNext()
      if (pack.getPkgr() instanceof JoinPackager) {
        pack.attachInput(key, tupIter.iterator());
        while (true) {
          if (processOnePackageOutput(context)) break;
        }
      } else {
        // join is not optimized, so package will
        // give only one tuple out for the key
        pack.attachInput(key, tupIter.iterator());
        processOnePackageOutput(context);
      }
    }
Ejemplo n.º 2
0
    /**
     * The reduce function which packages the key and List&lt;Tuple&gt; into key, Bag&lt;Tuple&gt;
     * after converting Hadoop type key into Pig type. The package result is either collected as is,
     * if the reduce plan is empty or after passing through the reduce plan.
     */
    @Override
    protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
        throws IOException, InterruptedException {

      if (!initialized) {
        initialized = true;

        // cache the collector for use in runPipeline()
        // which could additionally be called from close()
        this.outputCollector = context;
        pigReporter.setRep(context);
        PhysicalOperator.setReporter(pigReporter);

        boolean aggregateWarning =
            "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
        PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
        pigStatusReporter.setContext(new MRTaskContext(context));
        PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
        pigHadoopLogger.setReporter(pigStatusReporter);
        pigHadoopLogger.setAggregate(aggregateWarning);
        PhysicalOperator.setPigLogger(pigHadoopLogger);

        for (POStore store : stores) {
          MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context);
          store.setStoreImpl(impl);
          store.setUp();
        }
      }

      // If the keyType is not a tuple, the MapWithComparator.collect()
      // would have wrapped the key into a tuple so that the
      // comparison UDF used in the order by can process it.
      // We need to unwrap the key out of the tuple and hand it
      // to the POPackage for processing
      if (keyType != DataType.TUPLE) {
        Tuple t = (Tuple) (key.getValueAsPigType());
        try {
          key = HDataType.getWritableComparableTypes(t.get(0), keyType);
        } catch (ExecException e) {
          throw e;
        }
      }

      pack.attachInput(key, tupIter.iterator());

      Result res = pack.getNextTuple();
      if (res.returnStatus == POStatus.STATUS_OK) {
        Tuple packRes = (Tuple) res.result;

        if (rp.isEmpty()) {
          context.write(null, packRes);
          return;
        }

        rp.attachInput(packRes);

        List<PhysicalOperator> leaves = rp.getLeaves();

        PhysicalOperator leaf = leaves.get(0);
        runPipeline(leaf);
      }

      if (res.returnStatus == POStatus.STATUS_NULL) {
        return;
      }

      if (res.returnStatus == POStatus.STATUS_ERR) {
        int errCode = 2093;
        String msg = "Encountered error in package operator while processing group.";
        throw new ExecException(msg, errCode, PigException.BUG);
      }
    }