Beispiel #1
0
 @SuppressWarnings("unchecked")
 public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job, Reporter reporter)
     throws IOException {
   this.job = job;
   this.reporter = reporter;
   localFs = FileSystem.getLocal(job);
   partitions = job.getNumReduceTasks();
   partitioner = (Partitioner) ReflectionUtils.newInstance(job.getPartitionerClass(), job);
   // sanity checks
   final float spillper = job.getFloat("io.sort.spill.percent", (float) 0.8);
   final float recper = job.getFloat("io.sort.record.percent", (float) 0.05);
   final int sortmb = job.getInt("io.sort.mb", 100);
   if (spillper > (float) 1.0 || spillper < (float) 0.0) {
     throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper);
   }
   if (recper > (float) 1.0 || recper < (float) 0.01) {
     throw new IOException("Invalid \"io.sort.record.percent\": " + recper);
   }
   if ((sortmb & 0x7FF) != sortmb) {
     throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
   }
   sorter =
       (IndexedSorter)
           ReflectionUtils.newInstance(job.getClass("map.sort.class", QuickSort.class), job);
   LOG.info("io.sort.mb = " + sortmb);
   // buffers and accounting
   int maxMemUsage = sortmb << 20;
   int recordCapacity = (int) (maxMemUsage * recper);
   recordCapacity -= recordCapacity % RECSIZE;
   kvbuffer = new byte[maxMemUsage - recordCapacity];
   bufvoid = kvbuffer.length;
   recordCapacity /= RECSIZE;
   kvoffsets = new int[recordCapacity];
   kvindices = new int[recordCapacity * ACCTSIZE];
   softBufferLimit = (int) (kvbuffer.length * spillper);
   softRecordLimit = (int) (kvoffsets.length * spillper);
   LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length);
   LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length);
   // k/v serialization
   comparator = job.getOutputKeyComparator();
   keyClass = (Class<K>) job.getMapOutputKeyClass();
   valClass = (Class<V>) job.getMapOutputValueClass();
   serializationFactory = new SerializationFactory(job);
   keySerializer = serializationFactory.getSerializer(keyClass);
   keySerializer.open(bb);
   valSerializer = serializationFactory.getSerializer(valClass);
   valSerializer.open(bb);
   // counters
   Counters counters = getCounters();
   mapOutputByteCounter = counters.findCounter(MAP_OUTPUT_BYTES);
   mapOutputRecordCounter = counters.findCounter(MAP_OUTPUT_RECORDS);
   combineInputCounter = counters.findCounter(COMBINE_INPUT_RECORDS);
   combineOutputCounter = counters.findCounter(COMBINE_OUTPUT_RECORDS);
   // compression
   if (job.getCompressMapOutput()) {
     Class<? extends CompressionCodec> codecClass =
         job.getMapOutputCompressorClass(DefaultCodec.class);
     codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);
   }
   // combiner
   combinerClass = job.getCombinerClass();
   combineCollector =
       (null != combinerClass) ? new CombineOutputCollector(combineOutputCounter) : null;
   minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3);
 }
Beispiel #2
0
    @SuppressWarnings("unchecked")
    @Override
    public void run() {
      JobID jobId = profile.getJobID();
      JobContext jContext = new JobContextImpl(conf, jobId);
      org.apache.hadoop.mapreduce.OutputCommitter outputCommitter = null;
      try {
        outputCommitter = createOutputCommitter(conf.getUseNewMapper(), jobId, conf);
      } catch (Exception e) {
        LOG.info("Failed to createOutputCommitter", e);
        return;
      }

      try {
        TaskSplitMetaInfo[] taskSplitMetaInfos =
            SplitMetaInfoReader.readSplitMetaInfo(jobId, localFs, conf, systemJobDir);
        int numReduceTasks = job.getNumReduceTasks();
        if (numReduceTasks > 1 || numReduceTasks < 0) {
          // we only allow 0 or 1 reducer in local mode
          numReduceTasks = 1;
          job.setNumReduceTasks(1);
        }
        outputCommitter.setupJob(jContext);
        status.setSetupProgress(1.0f);

        Map<TaskAttemptID, MapOutputFile> mapOutputFiles =
            Collections.synchronizedMap(new HashMap<TaskAttemptID, MapOutputFile>());

        List<MapTaskRunnable> taskRunnables =
            getMapTaskRunnables(taskSplitMetaInfos, jobId, mapOutputFiles);

        ExecutorService mapService = createMapExecutor(taskRunnables.size());
        // Start populating the executor with work units.
        // They may begin running immediately (in other threads).
        for (Runnable r : taskRunnables) {
          mapService.submit(r);
        }

        try {
          mapService.shutdown(); // Instructs queue to drain.

          // Wait for tasks to finish; do not use a time-based timeout.
          // (See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6179024)
          LOG.info("Waiting for map tasks");
          mapService.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
        } catch (InterruptedException ie) {
          // Cancel all threads.
          mapService.shutdownNow();
          throw ie;
        }

        LOG.info("Map task executor complete.");

        // After waiting for the map tasks to complete, if any of these
        // have thrown an exception, rethrow it now in the main thread context.
        for (MapTaskRunnable r : taskRunnables) {
          if (r.storedException != null) {
            throw new Exception(r.storedException);
          }
        }

        TaskAttemptID reduceId = new TaskAttemptID(new TaskID(jobId, false, 0), 0);
        try {
          if (numReduceTasks > 0) {
            ReduceTask reduce =
                new ReduceTask(systemJobFile.toString(), reduceId, 0, mapIds.size(), 1);
            reduce.setUser(UserGroupInformation.getCurrentUser().getShortUserName());
            JobConf localConf = new JobConf(job);
            localConf.set("mapreduce.jobtracker.address", "local");
            TaskRunner.setupChildMapredLocalDirs(reduce, localConf);
            // move map output to reduce input
            for (int i = 0; i < mapIds.size(); i++) {
              if (!this.isInterrupted()) {
                TaskAttemptID mapId = mapIds.get(i);
                Path mapOut = mapOutputFiles.get(mapId).getOutputFile();
                MapOutputFile localOutputFile = new MapOutputFile();
                localOutputFile.setConf(localConf);
                Path reduceIn =
                    localOutputFile.getInputFileForWrite(
                        mapId.getTaskID(), localFs.getFileStatus(mapOut).getLen());
                if (!localFs.mkdirs(reduceIn.getParent())) {
                  throw new IOException(
                      "Mkdirs failed to create " + reduceIn.getParent().toString());
                }
                if (!localFs.rename(mapOut, reduceIn))
                  throw new IOException("Couldn't rename " + mapOut);
              } else {
                throw new InterruptedException();
              }
            }
            if (!this.isInterrupted()) {
              reduce.setJobFile(localJobFile.toString());
              localConf.setUser(reduce.getUser());
              reduce.localizeConfiguration(localConf);
              reduce.setConf(localConf);
              reduce_tasks += 1;
              myMetrics.launchReduce(reduce.getTaskID());
              reduce.run(localConf, this);
              myMetrics.completeReduce(reduce.getTaskID());
              reduce_tasks -= 1;
            } else {
              throw new InterruptedException();
            }
          }
        } finally {
          for (MapOutputFile output : mapOutputFiles.values()) {
            output.removeAll();
          }
        }
        // delete the temporary directory in output directory
        outputCommitter.commitJob(jContext);
        status.setCleanupProgress(1.0f);

        if (killed) {
          this.status.setRunState(JobStatus.KILLED);
        } else {
          this.status.setRunState(JobStatus.SUCCEEDED);
        }

        JobEndNotifier.localRunnerNotification(job, status);

      } catch (Throwable t) {
        try {
          outputCommitter.abortJob(jContext, org.apache.hadoop.mapreduce.JobStatus.State.FAILED);
        } catch (IOException ioe) {
          LOG.info("Error cleaning up job:" + id);
        }
        status.setCleanupProgress(1.0f);
        if (killed) {
          this.status.setRunState(JobStatus.KILLED);
        } else {
          this.status.setRunState(JobStatus.FAILED);
        }
        LOG.warn(id, t);

        JobEndNotifier.localRunnerNotification(job, status);

      } finally {
        try {
          fs.delete(systemJobFile.getParent(), true); // delete submit dir
          localFs.delete(localJobFile, true); // delete local copy
          // Cleanup distributed cache
          taskDistributedCacheManager.release();
          trackerDistributedCacheManager.purgeCache();
        } catch (IOException e) {
          LOG.warn("Error cleaning up " + id + ": " + e);
        }
      }
    }