コード例 #1
0
    /**
     * The reduce function which packages the key and List<Tuple> into key, Bag<Tuple>
     * after converting Hadoop type key into Pig type. The package result is either collected as is,
     * if the reduce plan is empty or after passing through the reduce plan.
     */
    @Override
    protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
        throws IOException, InterruptedException {

      if (!initialized) {
        initialized = true;

        // cache the collector for use in runPipeline()
        // which could additionally be called from close()
        this.outputCollector = context;
        pigReporter.setRep(context);
        PhysicalOperator.setReporter(pigReporter);

        boolean aggregateWarning =
            "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
        PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
        pigStatusReporter.setContext(new MRTaskContext(context));
        PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
        pigHadoopLogger.setReporter(pigStatusReporter);
        pigHadoopLogger.setAggregate(aggregateWarning);
        PhysicalOperator.setPigLogger(pigHadoopLogger);

        if (!inIllustrator)
          for (POStore store : stores) {
            MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context);
            store.setStoreImpl(impl);
            store.setUp();
          }
      }

      // In the case we optimize the join, we combine
      // POPackage and POForeach - so we could get many
      // tuples out of the getnext() call of POJoinPackage
      // In this case, we process till we see EOP from
      // POJoinPacakage.getNext()
      if (pack.getPkgr() instanceof JoinPackager) {
        pack.attachInput(key, tupIter.iterator());
        while (true) {
          if (processOnePackageOutput(context)) break;
        }
      } else {
        // join is not optimized, so package will
        // give only one tuple out for the key
        pack.attachInput(key, tupIter.iterator());
        processOnePackageOutput(context);
      }
    }
コード例 #2
0
    /**
     * Will be called once all the intermediate keys and values are processed. So right place to
     * stop the reporter thread.
     */
    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
      super.cleanup(context);

      if (errorInReduce) {
        // there was an error in reduce - just return
        return;
      }

      if (PigMapReduce.sJobConfInternal.get().get("pig.stream.in.reduce", "false").equals("true")) {
        // If there is a stream in the pipeline we could
        // potentially have more to process - so lets
        // set the flag stating that all map input has been sent
        // already and then lets run the pipeline one more time
        // This will result in nothing happening in the case
        // where there is no stream in the pipeline
        rp.endOfAllInput = true;
        runPipeline(leaf);
      }

      if (!inIllustrator) {
        for (POStore store : stores) {
          if (!initialized) {
            MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context);
            store.setStoreImpl(impl);
            store.setUp();
          }
          store.tearDown();
        }
      }

      // Calling EvalFunc.finish()
      UDFFinishVisitor finisher =
          new UDFFinishVisitor(rp, new DependencyOrderWalker<PhysicalOperator, PhysicalPlan>(rp));
      try {
        finisher.visit();
      } catch (VisitorException e) {
        throw new IOException("Error trying to finish UDFs", e);
      }

      PhysicalOperator.setReporter(null);
      initialized = false;
    }
コード例 #3
0
    /**
     * The reduce function which packages the key and List&lt;Tuple&gt; into key, Bag&lt;Tuple&gt;
     * after converting Hadoop type key into Pig type. The package result is either collected as is,
     * if the reduce plan is empty or after passing through the reduce plan.
     */
    @Override
    protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
        throws IOException, InterruptedException {

      if (!initialized) {
        initialized = true;

        // cache the collector for use in runPipeline()
        // which could additionally be called from close()
        this.outputCollector = context;
        pigReporter.setRep(context);
        PhysicalOperator.setReporter(pigReporter);

        boolean aggregateWarning =
            "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
        PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
        pigStatusReporter.setContext(new MRTaskContext(context));
        PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
        pigHadoopLogger.setReporter(pigStatusReporter);
        pigHadoopLogger.setAggregate(aggregateWarning);
        PhysicalOperator.setPigLogger(pigHadoopLogger);

        for (POStore store : stores) {
          MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context);
          store.setStoreImpl(impl);
          store.setUp();
        }
      }

      // If the keyType is not a tuple, the MapWithComparator.collect()
      // would have wrapped the key into a tuple so that the
      // comparison UDF used in the order by can process it.
      // We need to unwrap the key out of the tuple and hand it
      // to the POPackage for processing
      if (keyType != DataType.TUPLE) {
        Tuple t = (Tuple) (key.getValueAsPigType());
        try {
          key = HDataType.getWritableComparableTypes(t.get(0), keyType);
        } catch (ExecException e) {
          throw e;
        }
      }

      pack.attachInput(key, tupIter.iterator());

      Result res = pack.getNextTuple();
      if (res.returnStatus == POStatus.STATUS_OK) {
        Tuple packRes = (Tuple) res.result;

        if (rp.isEmpty()) {
          context.write(null, packRes);
          return;
        }

        rp.attachInput(packRes);

        List<PhysicalOperator> leaves = rp.getLeaves();

        PhysicalOperator leaf = leaves.get(0);
        runPipeline(leaf);
      }

      if (res.returnStatus == POStatus.STATUS_NULL) {
        return;
      }

      if (res.returnStatus == POStatus.STATUS_ERR) {
        int errCode = 2093;
        String msg = "Encountered error in package operator while processing group.";
        throw new ExecException(msg, errCode, PigException.BUG);
      }
    }