// 加载Hadoop配置文件 public static JobConf getConfig() { JobConf conf = new JobConf(HDFSDao.class); conf.setJobName("HdfsDAO"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); return conf; }
public JobBuilder(String jobName, Configuration conf) { _jobConf = new JobConf(conf); // legacy crap _jobConf.addResource("nutch-default.xml"); _jobConf.addResource("nutch-site.xml"); // defaults _jobConf.setNumTasksToExecutePerJvm(1000); _jobConf.setJobName(jobName); }
@SuppressWarnings("unchecked") public static void main(String[] args) throws IOException, HiveException { String planFileName = null; String jobConfFileName = null; boolean noLog = false; String files = null; boolean localtask = false; try { for (int i = 0; i < args.length; i++) { if (args[i].equals("-plan")) { planFileName = args[++i]; } else if (args[i].equals("-jobconffile")) { jobConfFileName = args[++i]; } else if (args[i].equals("-nolog")) { noLog = true; } else if (args[i].equals("-files")) { files = args[++i]; } else if (args[i].equals("-localtask")) { localtask = true; } } } catch (IndexOutOfBoundsException e) { System.err.println("Missing argument to option"); printUsage(); } JobConf conf; if (localtask) { conf = new JobConf(MapredLocalTask.class); } else { conf = new JobConf(ExecDriver.class); } if (jobConfFileName != null) { conf.addResource(new Path(jobConfFileName)); } if (files != null) { conf.set("tmpfiles", files); } if (UserGroupInformation.isSecurityEnabled()) { String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION); if (hadoopAuthToken != null) { conf.set("mapreduce.job.credentials.binary", hadoopAuthToken); } } boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT); String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim(); if (queryId.isEmpty()) { queryId = "unknown-" + System.currentTimeMillis(); } System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId); if (noLog) { // If started from main(), and noLog is on, we should not output // any logs. To turn the log on, please set -Dtest.silent=false org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger(); NullAppender appender = NullAppender.createNullAppender(); appender.addToLogger(logger.getName(), Level.ERROR); appender.start(); } else { setupChildLog4j(conf); } Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName()); LogHelper console = new LogHelper(LOG, isSilent); if (planFileName == null) { console.printError("Must specify Plan File Name"); printUsage(); } // print out the location of the log file for the user so // that it's easy to find reason for local mode execution failures for (Appender appender : ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger()) .getAppenders() .values()) { if (appender instanceof FileAppender) { console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName()); } else if (appender instanceof RollingFileAppender) { console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName()); } } // the plan file should always be in local directory Path p = new Path(planFileName); FileSystem fs = FileSystem.getLocal(conf); InputStream pathData = fs.open(p); // this is workaround for hadoop-17 - libjars are not added to classpath of the // child process. so we add it here explicitly String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS); String addedJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEADDEDJARS); try { // see also - code in CliDriver.java ClassLoader loader = conf.getClassLoader(); if (StringUtils.isNotBlank(auxJars)) { loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")); } if (StringUtils.isNotBlank(addedJars)) { loader = Utilities.addToClassPath(loader, StringUtils.split(addedJars, ",")); } conf.setClassLoader(loader); // Also set this to the Thread ContextClassLoader, so new threads will // inherit // this class loader, and propagate into newly created Configurations by // those // new threads. Thread.currentThread().setContextClassLoader(loader); } catch (Exception e) { throw new HiveException(e.getMessage(), e); } int ret; if (localtask) { memoryMXBean = ManagementFactory.getMemoryMXBean(); MapredLocalWork plan = Utilities.deserializePlan(pathData, MapredLocalWork.class, conf); MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent); ret = ed.executeInProcess(new DriverContext()); } else { MapredWork plan = Utilities.deserializePlan(pathData, MapredWork.class, conf); ExecDriver ed = new ExecDriver(plan, conf, isSilent); ret = ed.execute(new DriverContext()); } if (ret != 0) { System.exit(ret); } }
/** * Create an Aggregate based map/reduce job. * * @param args the arguments used for job creation. Generic hadoop arguments are accepted. * @return a JobConf object ready for submission. * @throws IOException * @see GenericOptionsParser */ public static JobConf createValueAggregatorJob(String args[]) throws IOException { Configuration conf = new Configuration(); GenericOptionsParser genericParser = new GenericOptionsParser(conf, args); args = genericParser.getRemainingArgs(); if (args.length < 2) { System.out.println( "usage: inputDirs outDir " + "[numOfReducer [textinputformat|seq [specfile [jobName]]]]"); GenericOptionsParser.printGenericCommandUsage(System.out); System.exit(1); } String inputDir = args[0]; String outputDir = args[1]; int numOfReducers = 1; if (args.length > 2) { numOfReducers = Integer.parseInt(args[2]); } Class<? extends InputFormat> theInputFormat = TextInputFormat.class; if (args.length > 3 && args[3].compareToIgnoreCase("textinputformat") == 0) { theInputFormat = TextInputFormat.class; } else { theInputFormat = SequenceFileInputFormat.class; } Path specFile = null; if (args.length > 4) { specFile = new Path(args[4]); } String jobName = ""; if (args.length > 5) { jobName = args[5]; } JobConf theJob = new JobConf(conf); if (specFile != null) { theJob.addResource(specFile); } String userJarFile = theJob.get("user.jar.file"); if (userJarFile == null) { theJob.setJarByClass(ValueAggregator.class); } else { theJob.setJar(userJarFile); } theJob.setJobName("ValueAggregatorJob: " + jobName); FileInputFormat.addInputPaths(theJob, inputDir); theJob.setInputFormat(theInputFormat); theJob.setMapperClass(ValueAggregatorMapper.class); FileOutputFormat.setOutputPath(theJob, new Path(outputDir)); theJob.setOutputFormat(TextOutputFormat.class); theJob.setMapOutputKeyClass(Text.class); theJob.setMapOutputValueClass(Text.class); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); theJob.setReducerClass(ValueAggregatorReducer.class); theJob.setCombinerClass(ValueAggregatorCombiner.class); theJob.setNumMapTasks(1); theJob.setNumReduceTasks(numOfReducers); return theJob; }