Java ShimLoader示例，org.apache.hadoop.hive.shims.ShimLoader Java示例

示例#1

0

显示文件

文件： HiveAuthFactory.java 项目： wangbin83-gmail-com/apache-hive-1.0.1-src

 public HiveAuthFactory(HiveConf conf) throws TTransportException {
   this.conf = conf;
   saslMessageLimit = conf.getIntVar(ConfVars.HIVE_THRIFT_SASL_MESSAGE_LIMIT);
   String transTypeStr = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE);
   String authTypeStr = conf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION);
   transportType = TransTypes.valueOf(transTypeStr.toUpperCase());
   authType =
       authTypeStr == null
           ? transportType.getDefaultAuthType()
           : AuthTypes.valueOf(authTypeStr.toUpperCase());
   if (transportType == TransTypes.BINARY
       && authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.name())
       && ShimLoader.getHadoopShims().isSecureShimImpl()) {
     saslServer =
         ShimLoader.getHadoopThriftAuthBridge()
             .createServer(
                 conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB),
                 conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL));
     // start delegation token manager
     try {
       saslServer.startDelegationTokenSecretManager(conf, null, ServerMode.HIVESERVER2);
     } catch (Exception e) {
       throw new TTransportException("Failed to start token manager", e);
     }
   } else {
     saslServer = null;
   }
 }

示例#2

0

显示文件

文件： FolderPermissionBase.java 项目： saman-aghazadeh/apache-hive-1.2.0-src2

  public static void baseSetup() throws Exception {
    MiniDFSShim dfs = ShimLoader.getHadoopShims().getMiniDfs(conf, 4, true, null);
    fs = dfs.getFileSystem();
    baseDfsDir = new Path(new Path(fs.getUri()), "/base");
    fs.mkdirs(baseDfsDir);
    warehouseDir = new Path(baseDfsDir, "warehouse");
    fs.mkdirs(warehouseDir);
    conf.setVar(ConfVars.METASTOREWAREHOUSE, warehouseDir.toString());

    // Assuming the tests are run either in C or D drive in Windows OS!
    dataFileDir =
        conf.get("test.data.files")
            .replace('\\', '/')
            .replace("c:", "")
            .replace("C:", "")
            .replace("D:", "")
            .replace("d:", "");
    dataFilePath = new Path(dataFileDir, "kv1.txt");

    // Set up scratch directory
    Path scratchDir = new Path(baseDfsDir, "scratchdir");
    conf.setVar(HiveConf.ConfVars.SCRATCHDIR, scratchDir.toString());

    // set hive conf vars
    conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
    conf.setBoolVar(HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS, true);
    conf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
    int port = MetaStoreUtils.findFreePort();
    MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge());

    SessionState.start(new CliSessionState(conf));
    driver = new Driver(conf);
    setupDataTable();
  }

示例#3

0

显示文件

文件： OrcNewInputFormat.java 项目： sushrutikhar/hive

 @Override
 public RecordReader<NullWritable, OrcStruct> createRecordReader(
     InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
   FileSplit fileSplit = (FileSplit) inputSplit;
   Path path = fileSplit.getPath();
   Configuration conf = ShimLoader.getHadoopShims().getConfiguration(context);
   return new OrcRecordReader(
       OrcFile.createReader(path, OrcFile.readerOptions(conf)),
       ShimLoader.getHadoopShims().getConfiguration(context),
       fileSplit.getStart(),
       fileSplit.getLength());
 }

示例#4

0

显示文件

文件： OrcNewInputFormat.java 项目： sushrutikhar/hive

 @Override
 public List<InputSplit> getSplits(JobContext jobContext)
     throws IOException, InterruptedException {
   perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ORC_GET_SPLITS);
   Configuration conf = ShimLoader.getHadoopShims().getConfiguration(jobContext);
   List<OrcSplit> splits =
       OrcInputFormat.generateSplitsInfo(ShimLoader.getHadoopShims().getConfiguration(jobContext));
   List<InputSplit> result = new ArrayList<InputSplit>();
   for (OrcSplit split : OrcInputFormat.generateSplitsInfo(conf)) {
     result.add(new OrcNewSplit(split));
   }
   perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ORC_GET_SPLITS);
   return result;
 }

示例#5

0

显示文件

文件： GenericUDFOPGreaterThan.java 项目： Carlie20083/hive-0.7.0

  @Override
  public Object evaluate(DeferredObject[] arguments) throws HiveException {
    Object o0, o1;
    o0 = arguments[0].get();
    if (o0 == null) {
      return null;
    }
    o1 = arguments[1].get();
    if (o1 == null) {
      return null;
    }

    switch (compareType) {
      case COMPARE_TEXT:
        Text t0, t1;
        t0 = soi0.getPrimitiveWritableObject(o0);
        t1 = soi1.getPrimitiveWritableObject(o1);
        result.set(ShimLoader.getHadoopShims().compareText(t0, t1) > 0);
        break;
      case COMPARE_INT:
        result.set(ioi0.get(o0) > ioi1.get(o1));
        break;
      case COMPARE_LONG:
        result.set(loi0.get(o0) > loi1.get(o1));
        break;
      case COMPARE_BYTE:
        result.set(byoi0.get(o0) > byoi1.get(o1));
        break;
      case COMPARE_BOOL:
        boolean b0 = boi0.get(o0);
        boolean b1 = boi1.get(o1);
        result.set(b0 && !b1);
        break;
      case COMPARE_STRING:
        String s0, s1;
        s0 = soi0.getPrimitiveJavaObject(o0);
        s1 = soi1.getPrimitiveJavaObject(o1);
        result.set(s0.compareTo(s1) > 0);
        break;
      case SAME_TYPE:
        result.set(ObjectInspectorUtils.compare(o0, argumentOIs[0], o1, argumentOIs[1]) > 0);
        break;
      default:
        Object converted_o0 = converter0.convert(o0);
        if (converted_o0 == null) {
          return null;
        }
        Object converted_o1 = converter1.convert(o1);
        if (converted_o1 == null) {
          return null;
        }
        result.set(
            ObjectInspectorUtils.compare(
                    converted_o0, compareOI,
                    converted_o1, compareOI)
                > 0);
    }
    return result;
  }

示例#6

0

显示文件

文件： CompactorMR.java 项目： nkeywal/hive

 @Override
 public void abortJob(JobContext context, int status) throws IOException {
   JobConf conf = ShimLoader.getHadoopShims().getJobConf(context);
   Path tmpLocation = new Path(conf.get(TMP_LOCATION));
   FileSystem fs = tmpLocation.getFileSystem(conf);
   LOG.debug("Removing " + tmpLocation.toString());
   fs.delete(tmpLocation, true);
 }

示例#7

0

显示文件

文件： HiveHFileOutputFormat.java 项目： alanfgates/hive

  @Override
  public void checkOutputSpecs(FileSystem ignored, JobConf jc) throws IOException {
    // delegate to the new api
    Job job = new Job(jc);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);

    checkOutputSpecs(jobContext);
  }

示例#8

0

显示文件

文件： ExecDriver.java 项目： cschenyuan/hive-hack

  private void handleSampling(DriverContext context, MapWork mWork, JobConf job, HiveConf conf)
      throws Exception {
    assert mWork.getAliasToWork().keySet().size() == 1;

    String alias = mWork.getAliases().get(0);
    Operator<?> topOp = mWork.getAliasToWork().get(alias);
    PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias);

    ArrayList<String> paths = mWork.getPaths();
    ArrayList<PartitionDesc> parts = mWork.getPartitionDescs();

    List<Path> inputPaths = new ArrayList<Path>(paths.size());
    for (String path : paths) {
      inputPaths.add(new Path(path));
    }

    Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0));
    Path partitionFile = new Path(tmpPath, ".partitions");
    ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
    PartitionKeySampler sampler = new PartitionKeySampler();

    if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
      console.printInfo("Use sampling data created in previous MR");
      // merges sampling data from previous MR and make partition keys for total sort
      for (Path path : inputPaths) {
        FileSystem fs = path.getFileSystem(job);
        for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) {
          sampler.addSampleFile(status.getPath(), job);
        }
      }
    } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
      console.printInfo("Creating sampling data..");
      assert topOp instanceof TableScanOperator;
      TableScanOperator ts = (TableScanOperator) topOp;

      FetchWork fetchWork;
      if (!partDesc.isPartitioned()) {
        assert paths.size() == 1;
        fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc());
      } else {
        fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc());
      }
      fetchWork.setSource(ts);

      // random sampling
      FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts);
      try {
        ts.initialize(conf, new ObjectInspector[] {fetcher.getOutputObjectInspector()});
        OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler);
        while (fetcher.pushRow()) {}
      } finally {
        fetcher.clearFetchContext();
      }
    } else {
      throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType());
    }
    sampler.writePartitionKeys(partitionFile, conf, job);
  }

示例#9

0

显示文件

文件： HiveAuthFactory.java 项目： wangbin83-gmail-com/apache-hive-1.0.1-src

 // Perform kerberos login using the hadoop shim API if the configuration is available
 public static void loginFromKeytab(HiveConf hiveConf) throws IOException {
   String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL);
   String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);
   if (principal.isEmpty() || keyTabFile.isEmpty()) {
     throw new IOException("HiveServer2 Kerberos principal or keytab is not correctly configured");
   } else {
     ShimLoader.getHadoopShims().loginUserFromKeytab(principal, keyTabFile);
   }
 }

示例#10

0

显示文件

文件： HiveAuthFactory.java 项目： wangbin83-gmail-com/apache-hive-1.0.1-src

 // Perform SPNEGO login using the hadoop shim API if the configuration is available
 public static UserGroupInformation loginFromSpnegoKeytabAndReturnUGI(HiveConf hiveConf)
     throws IOException {
   String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL);
   String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB);
   if (principal.isEmpty() || keyTabFile.isEmpty()) {
     throw new IOException("HiveServer2 SPNEGO principal or keytab is not correctly configured");
   } else {
     return ShimLoader.getHadoopShims().loginUserFromKeytabAndReturnUGI(principal, keyTabFile);
   }
 }

示例#11

0

显示文件

文件： ThriftHttpServlet.java 项目： lirui-intel/hive

 private String getPrincipalWithoutRealmAndHost(String fullPrincipal)
     throws HttpAuthenticationException {
   KerberosNameShim fullKerberosName;
   try {
     fullKerberosName = ShimLoader.getHadoopShims().getKerberosNameShim(fullPrincipal);
     return fullKerberosName.getShortName();
   } catch (IOException e) {
     throw new HttpAuthenticationException(e);
   }
 }

示例#12

0

显示文件

文件： BucketingSortingReduceSinkOptimizer.java 项目： saman-aghazadeh/apache-hive-1.2.0-src2

 // Store the bucket path to bucket number mapping in the table scan operator.
 // Although one mapper per file is used (BucketizedInputHiveInput), it is possible that
 // any mapper can pick up any file (depending on the size of the files). The bucket number
 // corresponding to the input file is stored to name the output bucket file appropriately.
 private void storeBucketPathMapping(TableScanOperator tsOp, FileStatus[] srcs) {
   Map<String, Integer> bucketFileNameMapping = new HashMap<String, Integer>();
   for (int pos = 0; pos < srcs.length; pos++) {
     if (ShimLoader.getHadoopShims().isDirectory(srcs[pos])) {
       throw new RuntimeException(
           "Was expecting '" + srcs[pos].getPath() + "' to be bucket file.");
     }
     bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos);
   }
   tsOp.getConf().setBucketFileNameMapping(bucketFileNameMapping);
 }

示例#13

0

显示文件

文件： DagUtils.java 项目： sushrutikhar/hive

  /**
   * createTezDir creates a temporary directory in the scratchDir folder to be used with Tez.
   * Assumes scratchDir exists.
   */
  public Path createTezDir(Path scratchDir, Configuration conf) throws IOException {
    UserGroupInformation ugi;
    String userName = System.getProperty("user.name");
    try {
      ugi = ShimLoader.getHadoopShims().getUGIForConf(conf);
      userName = ShimLoader.getHadoopShims().getShortUserName(ugi);
    } catch (LoginException e) {
      throw new IOException(e);
    }

    scratchDir = new Path(scratchDir, userName);

    Path tezDir = getTezDir(scratchDir);
    FileSystem fs = tezDir.getFileSystem(conf);
    LOG.debug("TezDir path set " + tezDir + " for user: " + userName);
    // since we are adding the user name to the scratch dir, we do not
    // need to give more permissions here
    fs.mkdirs(tezDir);

    return tezDir;
  }

示例#14

0

显示文件

文件： HiveAuthFactory.java 项目： wangbin83-gmail-com/apache-hive-1.0.1-src

 public static void verifyProxyAccess(
     String realUser, String proxyUser, String ipAddress, HiveConf hiveConf)
     throws HiveSQLException {
   try {
     UserGroupInformation sessionUgi;
     if (ShimLoader.getHadoopShims().isSecurityEnabled()) {
       KerberosNameShim kerbName = ShimLoader.getHadoopShims().getKerberosNameShim(realUser);
       String shortPrincipalName = kerbName.getServiceName();
       sessionUgi = ShimLoader.getHadoopShims().createProxyUser(shortPrincipalName);
     } else {
       sessionUgi = ShimLoader.getHadoopShims().createRemoteUser(realUser, null);
     }
     if (!proxyUser.equalsIgnoreCase(realUser)) {
       ShimLoader.getHadoopShims()
           .authorizeProxyAccess(proxyUser, sessionUgi, ipAddress, hiveConf);
     }
   } catch (IOException e) {
     throw new HiveSQLException(
         "Failed to validate proxy privilege of " + realUser + " for " + proxyUser, "08S01", e);
   }
 }

示例#15

0

显示文件

文件： CompactorMR.java 项目： nkeywal/hive

    @Override
    public void commitJob(JobContext context) throws IOException {
      JobConf conf = ShimLoader.getHadoopShims().getJobConf(context);
      Path tmpLocation = new Path(conf.get(TMP_LOCATION));
      Path finalLocation = new Path(conf.get(FINAL_LOCATION));
      FileSystem fs = tmpLocation.getFileSystem(conf);
      LOG.debug("Moving contents of " + tmpLocation.toString() + " to " + finalLocation.toString());

      FileStatus[] contents = fs.listStatus(tmpLocation);
      for (int i = 0; i < contents.length; i++) {
        Path newPath = new Path(finalLocation, contents[i].getPath().getName());
        fs.rename(contents[i].getPath(), newPath);
      }
      fs.delete(tmpLocation, true);
    }

示例#16

0

显示文件

文件： DagUtils.java 项目： sushrutikhar/hive

  /**
   * @param conf
   * @return path to destination directory on hdfs
   * @throws LoginException if we are unable to figure user information
   * @throws IOException when any dfs operation fails.
   */
  public Path getDefaultDestDir(Configuration conf) throws LoginException, IOException {
    UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf);
    String userName = ShimLoader.getHadoopShims().getShortUserName(ugi);
    String userPathStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USER_INSTALL_DIR);
    Path userPath = new Path(userPathStr);
    FileSystem fs = userPath.getFileSystem(conf);
    if (!(fs instanceof DistributedFileSystem)) {
      throw new IOException(ErrorMsg.INVALID_HDFS_URI.format(userPathStr));
    }

    String jarPathStr = userPathStr + "/" + userName;
    String hdfsDirPathStr = jarPathStr;
    Path hdfsDirPath = new Path(hdfsDirPathStr);

    FileStatus fstatus = fs.getFileStatus(hdfsDirPath);
    if (!fstatus.isDir()) {
      throw new IOException(ErrorMsg.INVALID_DIR.format(hdfsDirPath.toString()));
    }

    Path retPath = new Path(hdfsDirPath.toString() + "/.hiveJars");

    fs.mkdirs(retPath);
    return retPath;
  }

示例#17

0

显示文件

文件： HiveSchemaTool.java 项目： cschenyuan/hive-hack

 public HiveSchemaTool(String hiveHome, HiveConf hiveConf, String dbType)
     throws HiveMetaException {
   if (hiveHome == null || hiveHome.isEmpty()) {
     throw new HiveMetaException("No Hive home directory provided");
   }
   this.hiveConf = hiveConf;
   this.dbType = dbType;
   this.metaStoreSchemaInfo = new MetaStoreSchemaInfo(hiveHome, hiveConf, dbType);
   userName = hiveConf.get(ConfVars.METASTORE_CONNECTION_USER_NAME.varname);
   try {
     passWord =
         ShimLoader.getHadoopShims().getPassword(hiveConf, HiveConf.ConfVars.METASTOREPWD.varname);
   } catch (IOException err) {
     throw new HiveMetaException("Error getting metastore password", err);
   }
 }

示例#18

0

显示文件

文件： ThriftHttpServlet.java 项目： lirui-intel/hive

 private String getPrincipalWithoutRealm(String fullPrincipal)
     throws HttpAuthenticationException {
   KerberosNameShim fullKerberosName;
   try {
     fullKerberosName = ShimLoader.getHadoopShims().getKerberosNameShim(fullPrincipal);
   } catch (IOException e) {
     throw new HttpAuthenticationException(e);
   }
   String serviceName = fullKerberosName.getServiceName();
   String hostName = fullKerberosName.getHostName();
   String principalWithoutRealm = serviceName;
   if (hostName != null) {
     principalWithoutRealm = serviceName + "/" + hostName;
   }
   return principalWithoutRealm;
 }

示例#19

0

显示文件

文件： SessionState.java 项目： cschenyuan/hive-hack

  public HadoopShims.HdfsEncryptionShim getHdfsEncryptionShim() throws HiveException {
    if (hdfsEncryptionShim == null) {
      try {
        FileSystem fs = FileSystem.get(conf);
        if ("hdfs".equals(fs.getUri().getScheme())) {
          hdfsEncryptionShim = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, conf);
        } else {
          LOG.info("Could not get hdfsEncryptionShim, it is only applicable to hdfs filesystem.");
        }
      } catch (Exception e) {
        throw new HiveException(e);
      }
    }

    return hdfsEncryptionShim;
  }

示例#20

0

显示文件

文件： DagUtils.java 项目： sushrutikhar/hive

  /*
   * Creates the configuration object necessary to run a specific vertex from
   * map work. This includes input formats, input processor, etc.
   */
  private JobConf initializeVertexConf(JobConf baseConf, MapWork mapWork) {
    JobConf conf = new JobConf(baseConf);

    if (mapWork.getNumMapTasks() != null) {
      conf.setInt(MRJobConfig.NUM_MAPS, mapWork.getNumMapTasks().intValue());
    }

    if (mapWork.getMaxSplitSize() != null) {
      HiveConf.setLongVar(
          conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mapWork.getMaxSplitSize().longValue());
    }

    if (mapWork.getMinSplitSize() != null) {
      HiveConf.setLongVar(
          conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mapWork.getMinSplitSize().longValue());
    }

    if (mapWork.getMinSplitSizePerNode() != null) {
      HiveConf.setLongVar(
          conf,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE,
          mapWork.getMinSplitSizePerNode().longValue());
    }

    if (mapWork.getMinSplitSizePerRack() != null) {
      HiveConf.setLongVar(
          conf,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK,
          mapWork.getMinSplitSizePerRack().longValue());
    }

    Utilities.setInputAttributes(conf, mapWork);

    String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT);
    if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) {
      inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName();
    }

    if (mapWork.isUseBucketizedHiveInputFormat()) {
      inpFormat = BucketizedHiveInputFormat.class.getName();
    }

    conf.set("mapred.mapper.class", ExecMapper.class.getName());
    conf.set("mapred.input.format.class", inpFormat);

    return conf;
  }

示例#21

0

显示文件

文件： TestMetaStoreInitListener.java 项目： hadoop-zuiwanyuan/hive

  @Override
  protected void setUp() throws Exception {

    super.setUp();
    System.setProperty("hive.metastore.init.hooks", DummyMetaStoreInitListener.class.getName());
    int port = MetaStoreUtils.findFreePort();
    MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge());
    hiveConf = new HiveConf(this.getClass());
    hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + port);
    hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
    hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
    SessionState.start(new CliSessionState(hiveConf));
    msc = new HiveMetaStoreClient(hiveConf);
    driver = new Driver(hiveConf);
  }

示例#22

0

显示文件

文件： HCatInputFormatReader.java 项目： Leolh/hive

  @Override
  public Iterator<HCatRecord> read() throws HCatException {

    HCatInputFormat inpFmt = new HCatInputFormat();
    RecordReader<WritableComparable, HCatRecord> rr;
    try {
      TaskAttemptContext cntxt =
          ShimLoader.getHadoopShims()
              .getHCatShim()
              .createTaskAttemptContext(conf, new TaskAttemptID());
      rr = inpFmt.createRecordReader(split, cntxt);
      rr.initialize(split, cntxt);
    } catch (IOException e) {
      throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    } catch (InterruptedException e) {
      throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    }
    return new HCatRecordItr(rr);
  }

示例#23

0

显示文件

文件： HCatInputFormatReader.java 项目： Leolh/hive

 @Override
 public ReaderContext prepareRead() throws HCatException {
   try {
     Job job = new Job(conf);
     HCatInputFormat hcif =
         HCatInputFormat.setInput(job, re.getDbName(), re.getTableName(), re.getFilterString());
     ReaderContextImpl cntxt = new ReaderContextImpl();
     cntxt.setInputSplits(
         hcif.getSplits(
             ShimLoader.getHadoopShims()
                 .getHCatShim()
                 .createJobContext(job.getConfiguration(), null)));
     cntxt.setConf(job.getConfiguration());
     return cntxt;
   } catch (IOException e) {
     throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
   } catch (InterruptedException e) {
     throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
   }
 }

示例#24

0

显示文件

文件： TestMetaStoreMetrics.java 项目： ZHIQUANLIU/hive

  @BeforeClass
  public static void before() throws Exception {
    int port = MetaStoreUtils.findFreePort();

    hiveConf = new HiveConf(TestMetaStoreMetrics.class);
    hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + port);
    hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
    hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_METRICS, true);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);

    MetricsFactory.close();
    MetricsFactory.init(hiveConf);
    metrics = (CodahaleMetrics) MetricsFactory.getInstance();

    // Increments one HMS connection
    MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge(), hiveConf);

    // Increments one HMS connection (Hive.get())
    SessionState.start(new CliSessionState(hiveConf));
    driver = new Driver(hiveConf);
  }

示例#25

0

显示文件

文件： ExecDriver.java 项目： EasonYi/hive

  /**
   * Given a Hive Configuration object - generate a command line fragment for passing such
   * configuration information to ExecDriver.
   */
  public static String generateCmdLine(HiveConf hconf, Context ctx) throws IOException {
    HiveConf tempConf = new HiveConf();
    Path hConfFilePath = new Path(ctx.getLocalTmpPath(), JOBCONF_FILENAME);
    OutputStream out = null;

    Properties deltaP = hconf.getChangedProperties();
    boolean hadoopLocalMode = ShimLoader.getHadoopShims().isLocalMode(hconf);
    String hadoopSysDir = "mapred.system.dir";
    String hadoopWorkDir = "mapred.local.dir";

    for (Object one : deltaP.keySet()) {
      String oneProp = (String) one;

      if (hadoopLocalMode && (oneProp.equals(hadoopSysDir) || oneProp.equals(hadoopWorkDir))) {
        continue;
      }
      tempConf.set(oneProp, hconf.get(oneProp));
    }

    // Multiple concurrent local mode job submissions can cause collisions in
    // working dirs and system dirs
    // Workaround is to rename map red working dir to a temp dir in such cases
    if (hadoopLocalMode) {
      tempConf.set(hadoopSysDir, hconf.get(hadoopSysDir) + "/" + Utilities.randGen.nextInt());
      tempConf.set(hadoopWorkDir, hconf.get(hadoopWorkDir) + "/" + Utilities.randGen.nextInt());
    }

    try {
      out = FileSystem.getLocal(hconf).create(hConfFilePath);
      tempConf.writeXml(out);
    } finally {
      if (out != null) {
        out.close();
      }
    }
    return " -jobconffile " + hConfFilePath.toString();
  }

示例#26

0

显示文件

文件： MapRedTask.java 项目： kyluka/hive

  @Override
  public int execute(DriverContext driverContext) {

    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;

    try {
      if (ctx == null) {
        ctx = new Context(conf);
        ctxCreated = true;
      }

      // estimate number of reducers
      setNumberOfReducers();

      // auto-determine local mode if allowed
      if (!ctx.isLocalOnlyExecutionMode() && conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {

        if (inputSummary == null) {
          inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work, null);
        }

        // set the values of totalInputFileSize and totalInputNumFiles, estimating them
        // if percentage block sampling is being used
        estimateInputSize();

        // at this point the number of reducers is precisely defined in the plan
        int numReducers = work.getNumReduceTasks();

        if (LOG.isDebugEnabled()) {
          LOG.debug(
              "Task: "
                  + getId()
                  + ", Summary: "
                  + totalInputFileSize
                  + ","
                  + totalInputNumFiles
                  + ","
                  + numReducers);
        }

        String reason =
            MapRedTask.isEligibleForLocalMode(
                conf, numReducers, totalInputFileSize, totalInputNumFiles);
        if (reason == null) {
          // clone configuration before modifying it on per-task basis
          cloneConf();
          conf.setVar(HiveConf.ConfVars.HADOOPJT, "local");
          console.printInfo("Selecting local mode for task: " + getId());
          this.setLocalMode(true);
        } else {
          console.printInfo("Cannot run job locally: " + reason);
          this.setLocalMode(false);
        }
      }

      runningViaChild =
          "local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))
              || conf.getBoolVar(HiveConf.ConfVars.SUBMITVIACHILD);

      if (!runningViaChild) {
        // we are not running this mapred task via child jvm
        // so directly invoke ExecDriver
        return super.execute(driverContext);
      }

      // we need to edit the configuration to setup cmdline. clone it first
      cloneConf();

      // propagate input format if necessary
      super.setInputAttributes(conf);

      // enable assertion
      String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
      String hiveJar = conf.getJar();

      String libJarsOption;
      String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
      conf.setVar(ConfVars.HIVEADDEDJARS, addedJars);
      String auxJars = conf.getAuxJars();
      // Put auxjars and addedjars together into libjars
      if (StringUtils.isEmpty(addedJars)) {
        if (StringUtils.isEmpty(auxJars)) {
          libJarsOption = " ";
        } else {
          libJarsOption = " -libjars " + auxJars + " ";
        }
      } else {
        if (StringUtils.isEmpty(auxJars)) {
          libJarsOption = " -libjars " + addedJars + " ";
        } else {
          libJarsOption = " -libjars " + addedJars + "," + auxJars + " ";
        }
      }
      // Generate the hiveConfArgs after potentially adding the jars
      String hiveConfArgs = generateCmdLine(conf);

      // write out the plan to a local file
      Path planPath = new Path(ctx.getLocalTmpFileURI(), "plan.xml");
      OutputStream out = FileSystem.getLocal(conf).create(planPath);
      MapredWork plan = getWork();
      LOG.info("Generating plan file " + planPath.toString());
      Utilities.serializeMapRedWork(plan, out);

      String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";

      String jarCmd;
      if (ShimLoader.getHadoopShims().usesJobShell()) {
        jarCmd = libJarsOption + hiveJar + " " + ExecDriver.class.getName();
      } else {
        jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption;
      }

      String cmdLine =
          hadoopExec
              + " jar "
              + jarCmd
              + " -plan "
              + planPath.toString()
              + " "
              + isSilent
              + " "
              + hiveConfArgs;

      String workDir = (new File(".")).getCanonicalPath();
      String files = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
      if (!files.isEmpty()) {
        cmdLine = cmdLine + " -files " + files;

        workDir = (new Path(ctx.getLocalTmpFileURI())).toUri().getPath();

        if (!(new File(workDir)).mkdir()) {
          throw new IOException("Cannot create tmp working dir: " + workDir);
        }

        for (String f : StringUtils.split(files, ',')) {
          Path p = new Path(f);
          String target = p.toUri().getPath();
          String link = workDir + Path.SEPARATOR + p.getName();
          if (FileUtil.symLink(target, link) != 0) {
            throw new IOException("Cannot link to added file: " + target + " from: " + link);
          }
        }
      }

      LOG.info("Executing: " + cmdLine);
      Process executor = null;

      // Inherit Java system variables
      String hadoopOpts;
      StringBuilder sb = new StringBuilder();
      Properties p = System.getProperties();
      for (String element : HIVE_SYS_PROP) {
        if (p.containsKey(element)) {
          sb.append(" -D" + element + "=" + p.getProperty(element));
        }
      }
      hadoopOpts = sb.toString();
      // Inherit the environment variables
      String[] env;
      Map<String, String> variables = new HashMap(System.getenv());
      // The user can specify the hadoop memory

      if ("local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))) {
        // if we are running in local mode - then the amount of memory used
        // by the child jvm can no longer default to the memory used by the
        // parent jvm
        int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
        if (hadoopMem == 0) {
          // remove env var that would default child jvm to use parent's memory
          // as default. child jvm would use default memory for a hadoop client
          variables.remove(HADOOP_MEM_KEY);
        } else {
          // user specified the memory for local mode hadoop run
          variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem));
        }
      } else {
        // nothing to do - we are not running in local mode - only submitting
        // the job via a child process. in this case it's appropriate that the
        // child jvm use the same memory as the parent jvm
      }

      if (variables.containsKey(HADOOP_OPTS_KEY)) {
        variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts);
      } else {
        variables.put(HADOOP_OPTS_KEY, hadoopOpts);
      }

      if (variables.containsKey(HIVE_DEBUG_RECURSIVE)) {
        configureDebugVariablesForChildJVM(variables);
      }

      env = new String[variables.size()];
      int pos = 0;
      for (Map.Entry<String, String> entry : variables.entrySet()) {
        String name = entry.getKey();
        String value = entry.getValue();
        env[pos++] = name + "=" + value;
      }
      // Run ExecDriver in another JVM
      executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir));

      StreamPrinter outPrinter =
          new StreamPrinter(
              executor.getInputStream(), null, SessionState.getConsole().getChildOutStream());
      StreamPrinter errPrinter =
          new StreamPrinter(
              executor.getErrorStream(), null, SessionState.getConsole().getChildErrStream());

      outPrinter.start();
      errPrinter.start();

      int exitVal = jobExecHelper.progressLocal(executor, getId());

      if (exitVal != 0) {
        LOG.error("Execution failed with exit status: " + exitVal);
      } else {
        LOG.info("Execution completed successfully");
      }

      return exitVal;
    } catch (Exception e) {
      e.printStackTrace();
      LOG.error("Exception: " + e.getMessage());
      return (1);
    } finally {
      try {
        // creating the context can create a bunch of files. So make
        // sure to clear it out
        if (ctxCreated) {
          ctx.clear();
        }

      } catch (Exception e) {
        LOG.error("Exception: " + e.getMessage());
      }
    }
  }

示例#27

0

显示文件

文件： HiveTestUtil.java 项目： babartareen/phoenix

  public HiveTestUtil(
      String outDir, String logDir, MiniClusterType clusterType, String confDir, String hadoopVer)
      throws Exception {
    this.outDir = outDir;
    this.logDir = logDir;
    if (confDir != null && !confDir.isEmpty()) {
      HiveConf.setHiveSiteLocation(
          new URL("file://" + new File(confDir).toURI().getPath() + "/hive-site.xml"));
      LOG.info("Setting hive-site: " + HiveConf.getHiveSiteLocation());
    }
    conf = new HiveConf();
    String tmpBaseDir = System.getProperty("test.tmp.dir");
    if (tmpBaseDir == null || tmpBaseDir == "") {
      tmpBaseDir = System.getProperty("java.io.tmpdir");
    }
    String metaStoreURL =
        "jdbc:derby:" + tmpBaseDir + File.separator + "metastore_dbtest;" + "create=true";
    conf.set(ConfVars.METASTORECONNECTURLKEY.varname, metaStoreURL);
    System.setProperty(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, metaStoreURL);

    // set where derby logs
    File derbyLogFile = new File(tmpBaseDir + "/derby.log");
    derbyLogFile.createNewFile();
    System.setProperty("derby.stream.error.file", derbyLogFile.getPath());

    this.hadoopVer = getHadoopMainVersion(hadoopVer);
    qMap = new TreeMap<String, String>();
    qSkipSet = new HashSet<String>();
    qSortSet = new HashSet<String>();
    qSortQuerySet = new HashSet<String>();
    qHashQuerySet = new HashSet<String>();
    qSortNHashQuerySet = new HashSet<String>();
    qJavaVersionSpecificOutput = new HashSet<String>();
    this.clusterType = clusterType;

    // Using randomUUID for dfs cluster
    System.setProperty("test.build.data", "target/test-data/hive-" + UUID.randomUUID().toString());

    HadoopShims shims = ShimLoader.getHadoopShims();
    int numberOfDataNodes = 4;

    if (clusterType != MiniClusterType.none) {
      dfs = shims.getMiniDfs(conf, numberOfDataNodes, true, null);
      FileSystem fs = dfs.getFileSystem();
      String uriString = WindowsPathUtil.getHdfsUriString(fs.getUri().toString());
      if (clusterType == MiniClusterType.tez) {
        mr = shims.getMiniTezCluster(conf, 4, uriString, 1);
      } else {
        mr = shims.getMiniMrCluster(conf, 4, uriString, 1);
      }
    }

    initConf();

    // Use the current directory if it is not specified
    String dataDir = conf.get("test.data.files");
    if (dataDir == null) {
      dataDir = new File(".").getAbsolutePath() + "/data/files";
    }

    testFiles = dataDir;

    // Use the current directory if it is not specified
    String scriptsDir = conf.get("test.data.scripts");
    if (scriptsDir == null) {
      scriptsDir = new File(".").getAbsolutePath() + "/data/scripts";
    }
    if (!initScript.isEmpty()) {
      this.initScript = scriptsDir + "/" + initScript;
    }
    if (!cleanupScript.isEmpty()) {
      this.cleanupScript = scriptsDir + "/" + cleanupScript;
    }

    overWrite = "true".equalsIgnoreCase(System.getProperty("test.output.overwrite"));

    setup = new HiveTestSetup();
    setup.preTest(conf);
    init();
  }

示例#28

0

显示文件

文件： JobDebugger.java 项目： ZHIQUANLIU/hive

    private void getTaskInfos() throws IOException, MalformedURLException {
      int startIndex = 0;
      while (true) {
        TaskCompletionEvent[] taskCompletions = rj.getTaskCompletionEvents(startIndex);

        if (taskCompletions == null || taskCompletions.length == 0) {
          break;
        }

        boolean more = true;
        boolean firstError = true;
        for (TaskCompletionEvent t : taskCompletions) {
          // For each task completion event, get the associated task id, job id
          // and the logs
          String taskId = t.getTaskAttemptId().getTaskID().toString();
          String jobId = t.getTaskAttemptId().getJobID().toString();
          if (firstError) {
            console.printError("Examining task ID: " + taskId + " (and more) from job " + jobId);
            firstError = false;
          }

          TaskInfo ti = taskIdToInfo.get(taskId);
          if (ti == null) {
            ti = new TaskInfo(jobId);
            taskIdToInfo.put(taskId, ti);
          }
          // These tasks should have come from the same job.
          assert (ti.getJobId() != null && ti.getJobId().equals(jobId));
          String taskAttemptLogUrl =
              ShimLoader.getHadoopShims()
                  .getTaskAttemptLogUrl(conf, t.getTaskTrackerHttp(), t.getTaskId());
          if (taskAttemptLogUrl != null) {
            ti.getLogUrls().add(taskAttemptLogUrl);
          }

          // If a task failed, fetch its error code (if available).
          // Also keep track of the total number of failures for that
          // task (typically, a task gets re-run up to 4 times if it fails.
          if (t.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) {
            String[] diags = rj.getTaskDiagnostics(t.getTaskAttemptId());
            ti.setDiagnosticMesgs(diags);
            if (ti.getErrorCode() == 0) {
              ti.setErrorCode(extractErrorCode(diags));
            }

            Integer failAttempts = failures.get(taskId);
            if (failAttempts == null) {
              failAttempts = Integer.valueOf(0);
            }
            failAttempts = Integer.valueOf(failAttempts.intValue() + 1);
            failures.put(taskId, failAttempts);
          } else {
            successes.add(taskId);
          }
        }
        if (!more) {
          break;
        }
        startIndex += taskCompletions.length;
      }
    }

示例#29

0

显示文件

文件： ExecDriver.java 项目： EasonYi/hive

  /** Execute a query plan using Hadoop. */
  @SuppressWarnings({"deprecation", "unchecked"})
  @Override
  public int execute(DriverContext driverContext) {

    IOPrepareCache ioPrepareCache = IOPrepareCache.get();
    ioPrepareCache.clear();

    boolean success = true;

    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    Path emptyScratchDir;

    MapWork mWork = work.getMapWork();
    ReduceWork rWork = work.getReduceWork();

    try {
      if (ctx == null) {
        ctx = new Context(job);
        ctxCreated = true;
      }

      emptyScratchDir = ctx.getMRTmpPath();
      FileSystem fs = emptyScratchDir.getFileSystem(job);
      fs.mkdirs(emptyScratchDir);
    } catch (IOException e) {
      e.printStackTrace();
      console.printError(
          "Error launching map-reduce job",
          "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return 5;
    }

    HiveFileFormatUtils.prepareJobOutput(job);
    // See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput()
    job.setOutputFormat(HiveOutputFormatImpl.class);

    job.setMapperClass(ExecMapper.class);

    job.setMapOutputKeyClass(HiveKey.class);
    job.setMapOutputValueClass(BytesWritable.class);

    try {
      String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER);
      job.setPartitionerClass(JavaUtils.loadClass(partitioner));
    } catch (ClassNotFoundException e) {
      throw new RuntimeException(e.getMessage(), e);
    }

    if (mWork.getNumMapTasks() != null) {
      job.setNumMapTasks(mWork.getNumMapTasks().intValue());
    }

    if (mWork.getMaxSplitSize() != null) {
      HiveConf.setLongVar(
          job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue());
    }

    if (mWork.getMinSplitSize() != null) {
      HiveConf.setLongVar(
          job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue());
    }

    if (mWork.getMinSplitSizePerNode() != null) {
      HiveConf.setLongVar(
          job,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE,
          mWork.getMinSplitSizePerNode().longValue());
    }

    if (mWork.getMinSplitSizePerRack() != null) {
      HiveConf.setLongVar(
          job,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK,
          mWork.getMinSplitSizePerRack().longValue());
    }

    job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0);
    job.setReducerClass(ExecReducer.class);

    // set input format information if necessary
    setInputAttributes(job);

    // Turn on speculative execution for reducers
    boolean useSpeculativeExecReducers =
        HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
    HiveConf.setBoolVar(
        job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers);

    String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);

    if (mWork.isUseBucketizedHiveInputFormat()) {
      inpFormat = BucketizedHiveInputFormat.class.getName();
    }

    LOG.info("Using " + inpFormat);

    try {
      job.setInputFormat(JavaUtils.loadClass(inpFormat));
    } catch (ClassNotFoundException e) {
      throw new RuntimeException(e.getMessage(), e);
    }

    // No-Op - we don't really write anything here ..
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands
    // it
    String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS);
    String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS);
    if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) {
      String allJars =
          StringUtils.isNotBlank(auxJars)
              ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars)
              : addedJars;
      LOG.info("adding libjars: " + allJars);
      initializeFiles("tmpjars", allJars);
    }

    // Transfer HIVEADDEDFILES to "tmpfiles" so hadoop understands it
    String addedFiles = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDFILES);
    if (StringUtils.isNotBlank(addedFiles)) {
      initializeFiles("tmpfiles", addedFiles);
    }
    int returnVal = 0;
    boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME));

    if (noName) {
      // This is for a special case to ensure unit tests pass
      HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt());
    }
    String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES);
    // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it
    if (StringUtils.isNotBlank(addedArchives)) {
      initializeFiles("tmparchives", addedArchives);
    }

    try {
      MapredLocalWork localwork = mWork.getMapRedLocalWork();
      if (localwork != null && localwork.hasStagedAlias()) {
        if (!ShimLoader.getHadoopShims().isLocalMode(job)) {
          Path localPath = localwork.getTmpPath();
          Path hdfsPath = mWork.getTmpHDFSPath();

          FileSystem hdfs = hdfsPath.getFileSystem(job);
          FileSystem localFS = localPath.getFileSystem(job);
          FileStatus[] hashtableFiles = localFS.listStatus(localPath);
          int fileNumber = hashtableFiles.length;
          String[] fileNames = new String[fileNumber];

          for (int i = 0; i < fileNumber; i++) {
            fileNames[i] = hashtableFiles[i].getPath().getName();
          }

          // package and compress all the hashtable files to an archive file
          String stageId = this.getId();
          String archiveFileName = Utilities.generateTarFileName(stageId);
          localwork.setStageID(stageId);

          CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName);
          Path archivePath = Utilities.generateTarPath(localPath, stageId);
          LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath);

          // upload archive file to hdfs
          Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId);
          short replication = (short) job.getInt("mapred.submit.replication", 10);
          hdfs.copyFromLocalFile(archivePath, hdfsFilePath);
          hdfs.setReplication(hdfsFilePath, replication);
          LOG.info("Upload 1 archive file  from" + archivePath + " to: " + hdfsFilePath);

          // add the archive file to distributed cache
          DistributedCache.createSymlink(job);
          DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job);
          LOG.info(
              "Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri());
        }
      }
      work.configureJobConf(job);
      List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false);
      Utilities.setInputPaths(job, inputPaths);

      Utilities.setMapRedWork(job, work, ctx.getMRTmpPath());

      if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) {
        try {
          handleSampling(ctx, mWork, job);
          job.setPartitionerClass(HiveTotalOrderPartitioner.class);
        } catch (IllegalStateException e) {
          console.printInfo("Not enough sampling data.. Rolling back to single reducer task");
          rWork.setNumReduceTasks(1);
          job.setNumReduceTasks(1);
        } catch (Exception e) {
          LOG.error("Sampling error", e);
          console.printError(
              e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
          rWork.setNumReduceTasks(1);
          job.setNumReduceTasks(1);
        }
      }

      // remove the pwd from conf file so that job tracker doesn't show this
      // logs
      String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
      }
      JobClient jc = new JobClient(job);
      // make this client wait if job tracker is not behaving well.
      Throttle.checkJobTracker(job, LOG);

      if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        StatsFactory factory = StatsFactory.newFactory(job);
        if (factory != null) {
          statsPublisher = factory.getStatsPublisher();
          List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job);
          if (rWork != null) {
            statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job));
          }
          StatsCollectionContext sc = new StatsCollectionContext(job);
          sc.setStatsTmpDirs(statsTmpDir);
          if (!statsPublisher.init(sc)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw new HiveException(
                  ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }
        }
      }

      Utilities.createTmpDirs(job, mWork);
      Utilities.createTmpDirs(job, rWork);

      SessionState ss = SessionState.get();
      if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
          && ss != null) {
        TezSessionState session = ss.getTezSession();
        TezSessionPoolManager.getInstance().close(session, true);
      }

      // Finally SUBMIT the JOB!
      rj = jc.submitJob(job);
      // replace it back
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd);
      }

      returnVal = jobExecHelper.progress(rj, jc, ctx.getHiveTxnManager());
      success = (returnVal == 0);
    } catch (Exception e) {
      e.printStackTrace();
      String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
      if (rj != null) {
        mesg = "Ended Job = " + rj.getJobID() + mesg;
      } else {
        mesg = "Job Submission failed" + mesg;
      }

      // Has to use full name to make sure it does not conflict with
      // org.apache.commons.lang.StringUtils
      console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));

      success = false;
      returnVal = 1;
    } finally {
      Utilities.clearWork(job);
      try {
        if (ctxCreated) {
          ctx.clear();
        }

        if (rj != null) {
          if (returnVal != 0) {
            rj.killJob();
          }
          jobID = rj.getID().toString();
        }
      } catch (Exception e) {
        LOG.warn("Failed while cleaning up ", e);
      } finally {
        HadoopJobExecHelper.runningJobs.remove(rj);
      }
    }

    // get the list of Dynamic partition paths
    try {
      if (rj != null) {
        if (mWork.getAliasToWork() != null) {
          for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) {
            op.jobClose(job, success);
          }
        }
        if (rWork != null) {
          rWork.getReducer().jobClose(job, success);
        }
      }
    } catch (Exception e) {
      // jobClose needs to execute successfully otherwise fail task
      if (success) {
        success = false;
        returnVal = 3;
        String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'";
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
      }
    }

    return (returnVal);
  }

示例#30

0

显示文件

文件： Context.java 项目： pensz/hive

 public void restoreOriginalTracker() {
   if (originalTracker != null) {
     ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, originalTracker);
     originalTracker = null;
   }
 }