Пример #1
0
  @SuppressWarnings("unchecked")
  public static void main(String[] args) throws IOException, HiveException {

    String planFileName = null;
    String jobConfFileName = null;
    boolean noLog = false;
    String files = null;
    boolean localtask = false;
    try {
      for (int i = 0; i < args.length; i++) {
        if (args[i].equals("-plan")) {
          planFileName = args[++i];
        } else if (args[i].equals("-jobconffile")) {
          jobConfFileName = args[++i];
        } else if (args[i].equals("-nolog")) {
          noLog = true;
        } else if (args[i].equals("-files")) {
          files = args[++i];
        } else if (args[i].equals("-localtask")) {
          localtask = true;
        }
      }
    } catch (IndexOutOfBoundsException e) {
      System.err.println("Missing argument to option");
      printUsage();
    }

    JobConf conf;
    if (localtask) {
      conf = new JobConf(MapredLocalTask.class);
    } else {
      conf = new JobConf(ExecDriver.class);
    }

    if (jobConfFileName != null) {
      conf.addResource(new Path(jobConfFileName));
    }

    if (files != null) {
      conf.set("tmpfiles", files);
    }

    if (UserGroupInformation.isSecurityEnabled()) {
      String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
      if (hadoopAuthToken != null) {
        conf.set("mapreduce.job.credentials.binary", hadoopAuthToken);
      }
    }

    boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);

    String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim();
    if (queryId.isEmpty()) {
      queryId = "unknown-" + System.currentTimeMillis();
    }
    System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId);

    if (noLog) {
      // If started from main(), and noLog is on, we should not output
      // any logs. To turn the log on, please set -Dtest.silent=false
      org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger();
      NullAppender appender = NullAppender.createNullAppender();
      appender.addToLogger(logger.getName(), Level.ERROR);
      appender.start();
    } else {
      setupChildLog4j(conf);
    }

    Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName());
    LogHelper console = new LogHelper(LOG, isSilent);

    if (planFileName == null) {
      console.printError("Must specify Plan File Name");
      printUsage();
    }

    // print out the location of the log file for the user so
    // that it's easy to find reason for local mode execution failures
    for (Appender appender :
        ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger())
            .getAppenders()
            .values()) {
      if (appender instanceof FileAppender) {
        console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
      } else if (appender instanceof RollingFileAppender) {
        console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
      }
    }

    // the plan file should always be in local directory
    Path p = new Path(planFileName);
    FileSystem fs = FileSystem.getLocal(conf);
    InputStream pathData = fs.open(p);

    // this is workaround for hadoop-17 - libjars are not added to classpath of the
    // child process. so we add it here explicitly

    String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS);
    String addedJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEADDEDJARS);
    try {
      // see also - code in CliDriver.java
      ClassLoader loader = conf.getClassLoader();
      if (StringUtils.isNotBlank(auxJars)) {
        loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","));
      }
      if (StringUtils.isNotBlank(addedJars)) {
        loader = Utilities.addToClassPath(loader, StringUtils.split(addedJars, ","));
      }
      conf.setClassLoader(loader);
      // Also set this to the Thread ContextClassLoader, so new threads will
      // inherit
      // this class loader, and propagate into newly created Configurations by
      // those
      // new threads.
      Thread.currentThread().setContextClassLoader(loader);
    } catch (Exception e) {
      throw new HiveException(e.getMessage(), e);
    }
    int ret;
    if (localtask) {
      memoryMXBean = ManagementFactory.getMemoryMXBean();
      MapredLocalWork plan = Utilities.deserializePlan(pathData, MapredLocalWork.class, conf);
      MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
      ret = ed.executeInProcess(new DriverContext());

    } else {
      MapredWork plan = Utilities.deserializePlan(pathData, MapredWork.class, conf);
      ExecDriver ed = new ExecDriver(plan, conf, isSilent);
      ret = ed.execute(new DriverContext());
    }

    if (ret != 0) {
      System.exit(ret);
    }
  }
Пример #2
0
  public void configure(JobConf job) {
    memoryMXBean = ManagementFactory.getMemoryMXBean();
    l4j.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax());

    try {
      l4j.info(
          "conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
      l4j.info(
          "thread classpath = "
              + Arrays.asList(
                  ((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs()));
    } catch (Exception e) {
      l4j.info("cannot get classpath: " + e.getMessage());
    }
    try {
      jc = job;
      int estCountBucketSize = jc.getInt("hive.exec.estdistinct.bucketsize.log", 15);
      int estCountBufferSize = jc.getInt("hive.exec.estdistinct.buffsize.log", 8);
      GenericUDAFCardinalityEstimation.initParams(estCountBucketSize, estCountBufferSize);

      boolean isChangSizeZero2Null =
          jc.getBoolean(
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_ZERO_SIZE_2_NULL.varname,
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_ZERO_SIZE_2_NULL.defaultBoolVal);
      boolean isChangeNull2Null =
          jc.getBoolean(
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_NULL_2_NULL.varname,
              HiveConf.ConfVars.HIVE_UDTF_EXPLODE_CHANGE_NULL_2_NULL.defaultBoolVal);
      GenericUDTFExplode.isChangSizeZero2Null = isChangSizeZero2Null;
      GenericUDTFExplode.isChangNull2Null = isChangeNull2Null;
      GenericUDTFPosExplode.isChangSizeZero2Null = isChangSizeZero2Null;
      GenericUDTFPosExplode.isChangNull2Null = isChangeNull2Null;

      mapredWork mrwork = Utilities.getMapRedWork(job);
      mo = new MapOperator();
      mo.setConf(mrwork);
      mo.setChildren(job);
      l4j.info(mo.dump(0));
      mo.initialize(jc, null);

      localWork = mrwork.getMapLocalWork();
      if (localWork == null) {
        return;
      }
      fetchOperators = new HashMap<String, FetchOperator>();
      for (Map.Entry<String, fetchWork> entry : localWork.getAliasToFetchWork().entrySet()) {
        fetchOperators.put(entry.getKey(), new FetchOperator(entry.getValue(), job));
        l4j.info("fetchoperator for " + entry.getKey() + " created");
      }
      for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
        Operator<? extends Serializable> forwardOp = localWork.getAliasToWork().get(entry.getKey());
        forwardOp.initialize(
            jc, new ObjectInspector[] {entry.getValue().getOutputObjectInspector()});
        l4j.info("fetchoperator for " + entry.getKey() + " initialized");
      }
    } catch (Throwable e) {
      abort = true;
      if (e instanceof OutOfMemoryError) {
        throw (OutOfMemoryError) e;
      } else {
        throw new RuntimeException("Map operator initialization failed", e);
      }
    }
  }