Example #1
0
 // 加载Hadoop配置文件
 public static JobConf getConfig() {
   JobConf conf = new JobConf(HDFSDao.class);
   conf.setJobName("HdfsDAO");
   conf.addResource("classpath:/hadoop/core-site.xml");
   conf.addResource("classpath:/hadoop/hdfs-site.xml");
   conf.addResource("classpath:/hadoop/mapred-site.xml");
   return conf;
 }
 public JobBuilder(String jobName, Configuration conf) {
   _jobConf = new JobConf(conf);
   // legacy crap
   _jobConf.addResource("nutch-default.xml");
   _jobConf.addResource("nutch-site.xml");
   // defaults
   _jobConf.setNumTasksToExecutePerJvm(1000);
   _jobConf.setJobName(jobName);
 }
Example #3
0
  @SuppressWarnings("unchecked")
  public static void main(String[] args) throws IOException, HiveException {

    String planFileName = null;
    String jobConfFileName = null;
    boolean noLog = false;
    String files = null;
    boolean localtask = false;
    try {
      for (int i = 0; i < args.length; i++) {
        if (args[i].equals("-plan")) {
          planFileName = args[++i];
        } else if (args[i].equals("-jobconffile")) {
          jobConfFileName = args[++i];
        } else if (args[i].equals("-nolog")) {
          noLog = true;
        } else if (args[i].equals("-files")) {
          files = args[++i];
        } else if (args[i].equals("-localtask")) {
          localtask = true;
        }
      }
    } catch (IndexOutOfBoundsException e) {
      System.err.println("Missing argument to option");
      printUsage();
    }

    JobConf conf;
    if (localtask) {
      conf = new JobConf(MapredLocalTask.class);
    } else {
      conf = new JobConf(ExecDriver.class);
    }

    if (jobConfFileName != null) {
      conf.addResource(new Path(jobConfFileName));
    }

    if (files != null) {
      conf.set("tmpfiles", files);
    }

    if (UserGroupInformation.isSecurityEnabled()) {
      String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
      if (hadoopAuthToken != null) {
        conf.set("mapreduce.job.credentials.binary", hadoopAuthToken);
      }
    }

    boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);

    String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim();
    if (queryId.isEmpty()) {
      queryId = "unknown-" + System.currentTimeMillis();
    }
    System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId);

    if (noLog) {
      // If started from main(), and noLog is on, we should not output
      // any logs. To turn the log on, please set -Dtest.silent=false
      org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger();
      NullAppender appender = NullAppender.createNullAppender();
      appender.addToLogger(logger.getName(), Level.ERROR);
      appender.start();
    } else {
      setupChildLog4j(conf);
    }

    Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName());
    LogHelper console = new LogHelper(LOG, isSilent);

    if (planFileName == null) {
      console.printError("Must specify Plan File Name");
      printUsage();
    }

    // print out the location of the log file for the user so
    // that it's easy to find reason for local mode execution failures
    for (Appender appender :
        ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger())
            .getAppenders()
            .values()) {
      if (appender instanceof FileAppender) {
        console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
      } else if (appender instanceof RollingFileAppender) {
        console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
      }
    }

    // the plan file should always be in local directory
    Path p = new Path(planFileName);
    FileSystem fs = FileSystem.getLocal(conf);
    InputStream pathData = fs.open(p);

    // this is workaround for hadoop-17 - libjars are not added to classpath of the
    // child process. so we add it here explicitly

    String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS);
    String addedJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEADDEDJARS);
    try {
      // see also - code in CliDriver.java
      ClassLoader loader = conf.getClassLoader();
      if (StringUtils.isNotBlank(auxJars)) {
        loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","));
      }
      if (StringUtils.isNotBlank(addedJars)) {
        loader = Utilities.addToClassPath(loader, StringUtils.split(addedJars, ","));
      }
      conf.setClassLoader(loader);
      // Also set this to the Thread ContextClassLoader, so new threads will
      // inherit
      // this class loader, and propagate into newly created Configurations by
      // those
      // new threads.
      Thread.currentThread().setContextClassLoader(loader);
    } catch (Exception e) {
      throw new HiveException(e.getMessage(), e);
    }
    int ret;
    if (localtask) {
      memoryMXBean = ManagementFactory.getMemoryMXBean();
      MapredLocalWork plan = Utilities.deserializePlan(pathData, MapredLocalWork.class, conf);
      MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
      ret = ed.executeInProcess(new DriverContext());

    } else {
      MapredWork plan = Utilities.deserializePlan(pathData, MapredWork.class, conf);
      ExecDriver ed = new ExecDriver(plan, conf, isSilent);
      ret = ed.execute(new DriverContext());
    }

    if (ret != 0) {
      System.exit(ret);
    }
  }
Example #4
0
  /**
   * Create an Aggregate based map/reduce job.
   *
   * @param args the arguments used for job creation. Generic hadoop arguments are accepted.
   * @return a JobConf object ready for submission.
   * @throws IOException
   * @see GenericOptionsParser
   */
  public static JobConf createValueAggregatorJob(String args[]) throws IOException {

    Configuration conf = new Configuration();

    GenericOptionsParser genericParser = new GenericOptionsParser(conf, args);
    args = genericParser.getRemainingArgs();

    if (args.length < 2) {
      System.out.println(
          "usage: inputDirs outDir " + "[numOfReducer [textinputformat|seq [specfile [jobName]]]]");
      GenericOptionsParser.printGenericCommandUsage(System.out);
      System.exit(1);
    }
    String inputDir = args[0];
    String outputDir = args[1];
    int numOfReducers = 1;
    if (args.length > 2) {
      numOfReducers = Integer.parseInt(args[2]);
    }

    Class<? extends InputFormat> theInputFormat = TextInputFormat.class;
    if (args.length > 3 && args[3].compareToIgnoreCase("textinputformat") == 0) {
      theInputFormat = TextInputFormat.class;
    } else {
      theInputFormat = SequenceFileInputFormat.class;
    }

    Path specFile = null;

    if (args.length > 4) {
      specFile = new Path(args[4]);
    }

    String jobName = "";

    if (args.length > 5) {
      jobName = args[5];
    }

    JobConf theJob = new JobConf(conf);
    if (specFile != null) {
      theJob.addResource(specFile);
    }
    String userJarFile = theJob.get("user.jar.file");
    if (userJarFile == null) {
      theJob.setJarByClass(ValueAggregator.class);
    } else {
      theJob.setJar(userJarFile);
    }
    theJob.setJobName("ValueAggregatorJob: " + jobName);

    FileInputFormat.addInputPaths(theJob, inputDir);

    theJob.setInputFormat(theInputFormat);

    theJob.setMapperClass(ValueAggregatorMapper.class);
    FileOutputFormat.setOutputPath(theJob, new Path(outputDir));
    theJob.setOutputFormat(TextOutputFormat.class);
    theJob.setMapOutputKeyClass(Text.class);
    theJob.setMapOutputValueClass(Text.class);
    theJob.setOutputKeyClass(Text.class);
    theJob.setOutputValueClass(Text.class);
    theJob.setReducerClass(ValueAggregatorReducer.class);
    theJob.setCombinerClass(ValueAggregatorCombiner.class);
    theJob.setNumMapTasks(1);
    theJob.setNumReduceTasks(numOfReducers);
    return theJob;
  }