public void initialize(TezOutputContext outputContext, Configuration conf, int numOutputs)
      throws IOException {
    this.outputContext = outputContext;
    this.conf = conf;
    this.partitions = numOutputs;

    rfs = ((LocalFileSystem) FileSystem.getLocal(this.conf)).getRaw();

    // sorter
    sorter =
        ReflectionUtils.newInstance(
            this.conf.getClass(
                TezJobConfig.TEZ_RUNTIME_INTERNAL_SORTER_CLASS,
                QuickSort.class,
                IndexedSorter.class),
            this.conf);

    comparator = ConfigUtils.getIntermediateOutputKeyComparator(this.conf);

    // k/v serialization
    keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf);
    valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf);
    serializationFactory = new SerializationFactory(this.conf);
    keySerializer = serializationFactory.getSerializer(keyClass);
    valSerializer = serializationFactory.getSerializer(valClass);

    //    counters
    mapOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.MAP_OUTPUT_BYTES);
    mapOutputRecordCounter =
        outputContext.getCounters().findCounter(TaskCounter.MAP_OUTPUT_RECORDS);
    fileOutputByteCounter =
        outputContext.getCounters().findCounter(TaskCounter.MAP_OUTPUT_MATERIALIZED_BYTES);
    spilledRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
    // compression
    if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) {
      Class<? extends CompressionCodec> codecClass =
          ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class);
      codec = ReflectionUtils.newInstance(codecClass, this.conf);
    } else {
      codec = null;
    }

    // Task outputs
    mapOutputFile = TezRuntimeUtils.instantiateTaskOutputManager(conf, outputContext);

    LOG.info(
        "Instantiating Partitioner: ["
            + conf.get(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS)
            + "]");
    this.conf.setInt(TezJobConfig.TEZ_RUNTIME_NUM_EXPECTED_PARTITIONS, this.partitions);
    this.partitioner = TezRuntimeUtils.instantiatePartitioner(this.conf);
    this.combiner = TezRuntimeUtils.instantiateCombiner(this.conf, outputContext);
  }
Ejemplo n.º 2
0
  @Override
  public void fetchFailed(
      String host, InputAttemptIdentifier srcAttemptIdentifier, boolean connectFailed) {
    // TODO NEWTEZ. Implement logic to report fetch failures after a threshold.
    // For now, reporting immediately.
    LOG.info(
        "Fetch failed for src: "
            + srcAttemptIdentifier
            + "InputIdentifier: "
            + srcAttemptIdentifier
            + ", connectFailed: "
            + connectFailed);
    failedShufflesCounter.increment(1);
    if (srcAttemptIdentifier == null) {
      String message = "Received fetchFailure for an unknown src (null)";
      LOG.fatal(message);
      inputContext.fatalError(null, message);
    } else {
      InputReadErrorEvent readError =
          new InputReadErrorEvent(
              "Fetch failure while fetching from "
                  + TezRuntimeUtils.getTaskAttemptIdentifier(
                      inputContext.getSourceVertexName(),
                      srcAttemptIdentifier.getInputIdentifier().getInputIndex(),
                      srcAttemptIdentifier.getAttemptNumber()),
              srcAttemptIdentifier.getInputIdentifier().getInputIndex(),
              srcAttemptIdentifier.getAttemptNumber());

      List<Event> failedEvents = Lists.newArrayListWithCapacity(1);
      failedEvents.add(readError);
      inputContext.sendEvents(failedEvents);
    }
  }