Esempi in Java per Job.getWorkingDirectory

Linguaggio di programmazione: Java

Spazio dei nomi/nome del pacchetto: org.apache.hadoop.mapreduce

Classe/tipologia: Job

Metodo/funzione: getWorkingDirectory

Esempi su hotexamples.com: 5

Job.getWorkingDirectory in Java: 5 esempi trovati. Questi sono i migliori esempi reali in Java per org.apache.hadoop.mapreduce.Job.getWorkingDirectory, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

waitForCompletion(30)

getInstance(30)

setInputFormatClass(30)

setJarByClass(30)

setJobName(30)

setMapOutputKeyClass(30)

setMapOutputValueClass(30)

setMapperClass(30)

setNumReduceTasks(30)

setOutputFormatClass(30)

setCombinerClass(30)

setOutputKeyClass(30)

setOutputValueClass(30)

getCounters(30)

getConfiguration(30)

setPartitionerClass(30)

setReducerClass(30)

submit(21)

ensureState(20)

setGroupingComparatorClass(15)

isSuccessful(15)

getJobName(13)

setSortComparatorClass(10)

getJobID(9)

getNumReduceTasks(6)

setWorkingDirectory(6)

isComplete(6)

setSpeculativeExecution(6)

getTrackingURL(5)

getWorkingDirectory(5)

getCredentials(5)

getID(5)

addCacheFile(4)

killJob(3)

getOutputFormatClass(3)

setJar(2)

setJobID(2)

getTaskReports(2)

getJar(2)

getStatus(2)

failTask(1)

toString(1)

ensureNotSet(1)

setUseNewAPI(1)

connect(1)

killTask(1)

setReduceSpeculativeExecution(1)

getClass(1)

getInputFormatClass(1)

getSortComparator(1)

Esempio n. 1

Mostra file

File: HFileMapReduceJobOutput.java Progetto: jeffkole/kiji-mapreduce

  /**
   * Configures the partitioner for generating HFiles.
   *
   * <p>Each generated HFile should fit within a region of of the target table. Additionally, it's
   * optimal to have only one HFile to load into each region, since a read from that region will
   * require reading from each HFile under management (until compaction happens and merges them all
   * back into one HFile).
   *
   * <p>To achieve this, we configure a TotalOrderPartitioner that will partition the records output
   * from the Mapper based on their rank in a total ordering of the keys. The <code>startKeys</code>
   * argument should contain a list of the first key in each of those partitions.
   *
   * @param job The job to configure.
   * @param startKeys A list of keys that will mark the boundaries between the partitions for the
   *     sorted map output records.
   * @throws IOException If there is an error.
   */
  private static void configurePartitioner(Job job, List<HFileKeyValue> startKeys)
      throws IOException {
    job.setPartitionerClass(TotalOrderPartitioner.class);

    LOG.info("Configuring " + startKeys.size() + " reduce partitions.");
    job.setNumReduceTasks(startKeys.size());

    // Write the file that the TotalOrderPartitioner reads to determine where to partition records.
    Path partitionFilePath =
        new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis());
    LOG.info("Writing partition information to " + partitionFilePath);

    final FileSystem fs = partitionFilePath.getFileSystem(job.getConfiguration());
    partitionFilePath = partitionFilePath.makeQualified(fs);
    writePartitionFile(job.getConfiguration(), partitionFilePath, startKeys);

    // Add it to the distributed cache.
    try {
      final URI cacheUri =
          new URI(partitionFilePath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
      DistributedCache.addCacheFile(cacheUri, job.getConfiguration());
    } catch (URISyntaxException e) {
      throw new IOException(e);
    }
    DistributedCache.createSymlink(job.getConfiguration());
  }

Esempio n. 2

Mostra file

File: JobSubmitter.java Progetto: nourlcn/yarn-comment

  //// jobSubmitDir is /tmp staging dir. copy lib 、files and so on into jobSubmitDir.
  private void copyAndConfigureFiles(Job job, Path jobSubmitDir) throws IOException {
    Configuration conf = job.getConfiguration();
    short replication = (short) conf.getInt(Job.SUBMIT_REPLICATION, 10);
    copyAndConfigureFiles(job, jobSubmitDir, replication);

    // Set the working directory
    if (job.getWorkingDirectory() == null) {
      job.setWorkingDirectory(jtFs.getWorkingDirectory());
    }
  }

Esempio n. 3

Mostra file

File: HadoopConverterJob.java Progetto: AlexanderSaydakov/druid

 public static void cleanup(Job job) throws IOException {
   final Path jobDir = getJobPath(job.getJobID(), job.getWorkingDirectory());
   final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
   RuntimeException e = null;
   try {
     JobHelper.deleteWithRetry(fs, jobDir, true);
   } catch (RuntimeException ex) {
     e = ex;
   }
   try {
     JobHelper.deleteWithRetry(
         fs, getJobClassPathDir(job.getJobName(), job.getWorkingDirectory()), true);
   } catch (RuntimeException ex) {
     if (e == null) {
       e = ex;
     } else {
       e.addSuppressed(ex);
     }
   }
   if (e != null) {
     throw e;
   }
 }

Esempio n. 4

Mostra file

File: ATLMRMaster.java Progetto: bluezio/ATL_MR

  /** Hadoop {@link Tool} implementation */
  @Override
  public int run(String[] args) throws Exception {

    Options options = new Options();

    configureOptions(options);

    CommandLineParser parser = new GnuParser();

    try {
      CommandLine commandLine = parser.parse(options, args);

      if (commandLine.hasOption(VERBOSE)) {
        Logger.getGlobal().setLevel(Level.FINEST);
      }

      if (commandLine.hasOption(QUIET)) {
        Logger.getGlobal().setLevel(Level.OFF);
      }

      String transformationLocation = commandLine.getOptionValue(TRANSFORMATION);
      String sourcemmLocation = commandLine.getOptionValue(SOURCE_PACKAGE);
      String targetmmLocation = commandLine.getOptionValue(TARGET_PACKAGE);
      String recordsLocation = commandLine.getOptionValue(RECORDS_FILE);
      String inputLocation = commandLine.getOptionValue(INPUT_MODEL);
      String outputLocation =
          commandLine.getOptionValue(
              OUTPUT_MODEL, new Path(inputLocation).suffix(".out.xmi").toString());

      int recommendedMappers = 1;
      if (commandLine.hasOption(RECOMMENDED_MAPPERS)) {
        recommendedMappers =
            ((Number) commandLine.getParsedOptionValue(RECOMMENDED_MAPPERS)).intValue();
      }

      Configuration conf = this.getConf();
      Job job = Job.getInstance(conf, JOB_NAME);

      // Configure classes
      job.setJarByClass(ATLMRMaster.class);
      job.setMapperClass(ATLMRMapper.class);
      job.setReducerClass(ATLMRReducer.class);
      job.setInputFormatClass(NLineInputFormat.class);
      job.setOutputFormatClass(SequenceFileOutputFormat.class);
      job.setMapOutputKeyClass(LongWritable.class);
      job.setMapOutputValueClass(Text.class);
      job.setNumReduceTasks(1);

      // Configure MapReduce input/outputs
      Path recordsPath = new Path(recordsLocation);
      FileInputFormat.setInputPaths(job, recordsPath);
      String timestamp = new SimpleDateFormat("yyyyMMddhhmm").format(new Date());
      String outDirName = "atlmr-out-" + timestamp + "-" + UUID.randomUUID();
      FileOutputFormat.setOutputPath(
          job, new Path(job.getWorkingDirectory().suffix(Path.SEPARATOR + outDirName).toUri()));

      // Configure records per map
      FileSystem fileSystem = FileSystem.get(recordsPath.toUri(), conf);
      InputStream inputStream = fileSystem.open(recordsPath);
      long linesPerMap =
          (long) Math.ceil((double) countLines(inputStream) / (double) recommendedMappers);
      job.getConfiguration().setLong(NLineInputFormat.LINES_PER_MAP, linesPerMap);

      // Configure ATL related inputs/outputs
      job.getConfiguration().set(TRANSFORMATION, transformationLocation);
      job.getConfiguration().set(SOURCE_PACKAGE, sourcemmLocation);
      job.getConfiguration().set(TARGET_PACKAGE, targetmmLocation);
      job.getConfiguration().set(INPUT_MODEL, inputLocation);
      job.getConfiguration().set(OUTPUT_MODEL, outputLocation);

      Logger.getGlobal().log(Level.INFO, "Starting Job execution");
      long begin = System.currentTimeMillis();
      int returnValue = job.waitForCompletion(true) ? STATUS_OK : STATUS_ERROR;
      long end = System.currentTimeMillis();
      Logger.getGlobal()
          .log(
              Level.INFO,
              MessageFormat.format(
                  "Job execution ended in {0}s with status code {1}",
                  (end - begin) / 1000, returnValue));

      return returnValue;

    } catch (ParseException e) {
      System.err.println(e.getLocalizedMessage());
      HelpFormatter formatter = new HelpFormatter();
      formatter.setOptionComparator(new OptionComarator<>());
      try {
        formatter.setWidth(Math.max(Terminal.getTerminal().getTerminalWidth(), 80));
      } catch (Throwable t) {
        // Nothing to do...
      }
      ;
      formatter.printHelp("yarn jar <this-file.jar>", options, true);
      return STATUS_ERROR;
    }
  }

Esempio n. 5

Mostra file

File: HadoopConverterJob.java Progetto: AlexanderSaydakov/druid

  public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
      jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
      throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
      jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(
        JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
        JobHelper.distributedClassPath(
            getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
        job);

    Throwable throwable = null;
    try {
      job.submit();
      log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
      final boolean success = job.waitForCompletion(true);
      if (!success) {
        final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
        if (reports != null) {
          for (final TaskReport report : reports) {
            log.error(
                "Error in task [%s] : %s",
                report.getTaskId(), Arrays.toString(report.getDiagnostics()));
          }
        }
        return null;
      }
      try {
        loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
        writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
      } catch (IOException ex) {
        log.error(ex, "Could not fetch counters");
      }
      final JobID jobID = job.getJobID();

      final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
      final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
      final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
      final List<Path> goodPaths = new ArrayList<>();
      while (it.hasNext()) {
        final LocatedFileStatus locatedFileStatus = it.next();
        if (locatedFileStatus.isFile()) {
          final Path myPath = locatedFileStatus.getPath();
          if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
            goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
          }
        }
      }
      if (goodPaths.isEmpty()) {
        log.warn("No good data found at [%s]", jobDir);
        return null;
      }
      final List<DataSegment> returnList =
          ImmutableList.copyOf(
              Lists.transform(
                  goodPaths,
                  new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                      try {
                        if (!fs.exists(input)) {
                          throw new ISE(
                              "Somehow [%s] was found but [%s] is missing at [%s]",
                              ConvertingOutputFormat.DATA_SUCCESS_KEY,
                              ConvertingOutputFormat.DATA_FILE_KEY,
                              jobDir);
                        }
                      } catch (final IOException e) {
                        throw Throwables.propagate(e);
                      }
                      try (final InputStream stream = fs.open(input)) {
                        return HadoopDruidConverterConfig.jsonMapper.readValue(
                            stream, DataSegment.class);
                      } catch (final IOException e) {
                        throw Throwables.propagate(e);
                      }
                    }
                  }));
      if (returnList.size() == segments.size()) {
        return returnList;
      } else {
        throw new ISE(
            "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
            segments.size(), returnList.size(), jobDir);
      }
    } catch (InterruptedException | ClassNotFoundException e) {
      RuntimeException exception = Throwables.propagate(e);
      throwable = exception;
      throw exception;
    } catch (Throwable t) {
      throwable = t;
      throw t;
    } finally {
      try {
        cleanup(job);
      } catch (IOException e) {
        if (throwable != null) {
          throwable.addSuppressed(e);
        } else {
          log.error(e, "Could not clean up job [%s]", job.getJobID());
        }
      }
    }
  }