Beispiel #1
0
 public FosterStorageHandler(String ifName, String ofName, String serdeName)
     throws ClassNotFoundException {
   this(
       (Class<? extends InputFormat>) JavaUtils.loadClass(ifName),
       (Class<? extends OutputFormat>) JavaUtils.loadClass(ofName),
       (Class<? extends SerDe>) JavaUtils.loadClass(serdeName));
 }
  /**
   * Create an instance of a storage handler. If storageHandler == null, then surrrogate
   * StorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. This
   * StorageHandler assumes the other supplied storage artifacts are for a file-based storage
   * system.
   *
   * @param conf job's configuration will be used to configure the Configurable StorageHandler
   * @param storageHandler fully qualified class name of the desired StorageHandle instance
   * @param serDe fully qualified class name of the desired SerDe instance
   * @param inputFormat fully qualified class name of the desired InputFormat instance
   * @param outputFormat fully qualified class name of the desired outputFormat instance
   * @return storageHandler instance
   * @throws IOException
   */
  public static HCatStorageHandler getStorageHandler(
      Configuration conf,
      String storageHandler,
      String serDe,
      String inputFormat,
      String outputFormat)
      throws IOException {

    if ((storageHandler == null) || (storageHandler.equals(FosterStorageHandler.class.getName()))) {
      try {
        FosterStorageHandler fosterStorageHandler =
            new FosterStorageHandler(inputFormat, outputFormat, serDe);
        fosterStorageHandler.setConf(conf);
        return fosterStorageHandler;
      } catch (ClassNotFoundException e) {
        throw new IOException("Failed to load " + "foster storage handler", e);
      }
    }

    try {
      Class<? extends HCatStorageHandler> handlerClass =
          (Class<? extends HCatStorageHandler>)
              Class.forName(storageHandler, true, JavaUtils.getClassLoader());
      return (HCatStorageHandler) ReflectionUtils.newInstance(handlerClass, conf);
    } catch (ClassNotFoundException e) {
      throw new IOException("Error in loading storage handler." + e.getMessage(), e);
    }
  }
Beispiel #3
0
 public SessionState(HiveConf conf, String userName) {
   this.conf = conf;
   this.userName = userName;
   isSilent = conf.getBoolVar(HiveConf.ConfVars.HIVESESSIONSILENT);
   ls = new LineageState();
   // Must be deterministic order map for consistent q-test output across Java versions
   overriddenConfigurations = new LinkedHashMap<String, String>();
   overriddenConfigurations.putAll(HiveConf.getConfSystemProperties());
   // if there isn't already a session name, go ahead and create it.
   if (StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HIVESESSIONID))) {
     conf.setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId());
   }
   parentLoader = JavaUtils.getClassLoader();
 }
Beispiel #4
0
 public void closeCUDFLoaders() {
   lock.lock();
   try {
     try {
       for (ClassLoader loader : mSessionUDFLoaders) {
         JavaUtils.closeClassLoader(loader);
       }
     } catch (IOException ie) {
       LOG.error("Error in close loader: " + ie);
     }
     mSessionUDFLoaders.clear();
   } finally {
     lock.unlock();
   }
 }
Beispiel #5
0
  @SuppressWarnings("deprecation")
  private static Class<? extends Deserializer> getDeserializerClass(String name) {
    // CDH uses different names for Parquet
    if ("parquet.hive.serde.ParquetHiveSerDe".equals(name)) {
      return ParquetHiveSerDe.class;
    }

    try {
      return Class.forName(name, true, JavaUtils.getClassLoader()).asSubclass(Deserializer.class);
    } catch (ClassNotFoundException e) {
      throw new PrestoException(HIVE_SERDE_NOT_FOUND, "deserializer does not exist: " + name);
    } catch (ClassCastException e) {
      throw new RuntimeException("invalid deserializer class: " + name);
    }
  }
  /**
   * Returns the hooks specified in a configuration variable. The hooks are returned in a list in
   * the order they were specified in the configuration variable.
   *
   * @param hookConfVar The configuration variable specifying a comma separated list of the hook
   *     class names.
   * @return A list of the hooks, in the order they are listed in the value of hookConfVar
   * @throws Exception
   */
  private static <T extends Hook> List<T> getHooks(String csHooks) throws Exception {

    List<T> hooks = new ArrayList<T>();
    if (csHooks.isEmpty()) {
      return hooks;
    }
    for (String hookClass : Splitter.on(",").omitEmptyStrings().trimResults().split(csHooks)) {
      try {
        @SuppressWarnings("unchecked")
        T hook = (T) Class.forName(hookClass, true, JavaUtils.getClassLoader()).newInstance();
        hooks.add(hook);
      } catch (ClassNotFoundException e) {
        LOG.error(hookClass + " Class not found:" + e.getMessage());
        throw e;
      }
    }

    return hooks;
  }
Beispiel #7
0
  public void close() throws IOException {
    if (txnMgr != null) txnMgr.closeTxnManager();
    JavaUtils.closeClassLoadersTo(conf.getClassLoader(), parentLoader);
    File resourceDir = new File(getConf().getVar(HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR));
    LOG.debug("Removing resource dir " + resourceDir);
    try {
      if (resourceDir.exists()) {
        FileUtils.deleteDirectory(resourceDir);
      }
    } catch (IOException e) {
      LOG.info("Error removing session resource dir " + resourceDir, e);
    } finally {
      detachSession();
    }

    try {
      if (tezSessionState != null) {
        TezSessionPoolManager.getInstance().close(tezSessionState, false);
      }
    } catch (Exception e) {
      LOG.info("Error closing tez session", e);
    } finally {
      tezSessionState = null;
    }

    if (sparkSession != null) {
      try {
        SparkSessionManagerImpl.getInstance().closeSession(sparkSession);
      } catch (Exception ex) {
        LOG.error("Error closing spark session.", ex);
      } finally {
        sparkSession = null;
      }
    }

    dropSessionPaths(conf);
  }
Beispiel #8
0
 private static <T> T instantiate(Class<T> classType, String classname) throws IOException {
   T t = null;
   try {
     Class c = JavaUtils.loadClass(classname);
     Object o = c.newInstance();
     if (classType.isAssignableFrom(o.getClass())) {
       t = (T) o;
     } else {
       String s = classname + " is not an instance of " + classType.getName();
       LOG.error(s);
       throw new IOException(s);
     }
   } catch (ClassNotFoundException e) {
     LOG.error("Unable to instantiate class, " + StringUtils.stringifyException(e));
     throw new IOException(e);
   } catch (InstantiationException e) {
     LOG.error("Unable to instantiate class, " + StringUtils.stringifyException(e));
     throw new IOException(e);
   } catch (IllegalAccessException e) {
     LOG.error("Unable to instantiate class, " + StringUtils.stringifyException(e));
     throw new IOException(e);
   }
   return t;
 }
Beispiel #9
0
  /** Execute a query plan using Hadoop. */
  @SuppressWarnings({"deprecation", "unchecked"})
  @Override
  public int execute(DriverContext driverContext) {

    IOPrepareCache ioPrepareCache = IOPrepareCache.get();
    ioPrepareCache.clear();

    boolean success = true;

    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    Path emptyScratchDir;

    MapWork mWork = work.getMapWork();
    ReduceWork rWork = work.getReduceWork();

    try {
      if (ctx == null) {
        ctx = new Context(job);
        ctxCreated = true;
      }

      emptyScratchDir = ctx.getMRTmpPath();
      FileSystem fs = emptyScratchDir.getFileSystem(job);
      fs.mkdirs(emptyScratchDir);
    } catch (IOException e) {
      e.printStackTrace();
      console.printError(
          "Error launching map-reduce job",
          "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return 5;
    }

    HiveFileFormatUtils.prepareJobOutput(job);
    // See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput()
    job.setOutputFormat(HiveOutputFormatImpl.class);

    job.setMapperClass(ExecMapper.class);

    job.setMapOutputKeyClass(HiveKey.class);
    job.setMapOutputValueClass(BytesWritable.class);

    try {
      String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER);
      job.setPartitionerClass(JavaUtils.loadClass(partitioner));
    } catch (ClassNotFoundException e) {
      throw new RuntimeException(e.getMessage(), e);
    }

    if (mWork.getNumMapTasks() != null) {
      job.setNumMapTasks(mWork.getNumMapTasks().intValue());
    }

    if (mWork.getMaxSplitSize() != null) {
      HiveConf.setLongVar(
          job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue());
    }

    if (mWork.getMinSplitSize() != null) {
      HiveConf.setLongVar(
          job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue());
    }

    if (mWork.getMinSplitSizePerNode() != null) {
      HiveConf.setLongVar(
          job,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE,
          mWork.getMinSplitSizePerNode().longValue());
    }

    if (mWork.getMinSplitSizePerRack() != null) {
      HiveConf.setLongVar(
          job,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK,
          mWork.getMinSplitSizePerRack().longValue());
    }

    job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0);
    job.setReducerClass(ExecReducer.class);

    // set input format information if necessary
    setInputAttributes(job);

    // Turn on speculative execution for reducers
    boolean useSpeculativeExecReducers =
        HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
    HiveConf.setBoolVar(
        job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers);

    String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);

    if (mWork.isUseBucketizedHiveInputFormat()) {
      inpFormat = BucketizedHiveInputFormat.class.getName();
    }

    LOG.info("Using " + inpFormat);

    try {
      job.setInputFormat(JavaUtils.loadClass(inpFormat));
    } catch (ClassNotFoundException e) {
      throw new RuntimeException(e.getMessage(), e);
    }

    // No-Op - we don't really write anything here ..
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands
    // it
    String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS);
    String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS);
    if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) {
      String allJars =
          StringUtils.isNotBlank(auxJars)
              ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars)
              : addedJars;
      LOG.info("adding libjars: " + allJars);
      initializeFiles("tmpjars", allJars);
    }

    // Transfer HIVEADDEDFILES to "tmpfiles" so hadoop understands it
    String addedFiles = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDFILES);
    if (StringUtils.isNotBlank(addedFiles)) {
      initializeFiles("tmpfiles", addedFiles);
    }
    int returnVal = 0;
    boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME));

    if (noName) {
      // This is for a special case to ensure unit tests pass
      HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt());
    }
    String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES);
    // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it
    if (StringUtils.isNotBlank(addedArchives)) {
      initializeFiles("tmparchives", addedArchives);
    }

    try {
      MapredLocalWork localwork = mWork.getMapRedLocalWork();
      if (localwork != null && localwork.hasStagedAlias()) {
        if (!ShimLoader.getHadoopShims().isLocalMode(job)) {
          Path localPath = localwork.getTmpPath();
          Path hdfsPath = mWork.getTmpHDFSPath();

          FileSystem hdfs = hdfsPath.getFileSystem(job);
          FileSystem localFS = localPath.getFileSystem(job);
          FileStatus[] hashtableFiles = localFS.listStatus(localPath);
          int fileNumber = hashtableFiles.length;
          String[] fileNames = new String[fileNumber];

          for (int i = 0; i < fileNumber; i++) {
            fileNames[i] = hashtableFiles[i].getPath().getName();
          }

          // package and compress all the hashtable files to an archive file
          String stageId = this.getId();
          String archiveFileName = Utilities.generateTarFileName(stageId);
          localwork.setStageID(stageId);

          CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName);
          Path archivePath = Utilities.generateTarPath(localPath, stageId);
          LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath);

          // upload archive file to hdfs
          Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId);
          short replication = (short) job.getInt("mapred.submit.replication", 10);
          hdfs.copyFromLocalFile(archivePath, hdfsFilePath);
          hdfs.setReplication(hdfsFilePath, replication);
          LOG.info("Upload 1 archive file  from" + archivePath + " to: " + hdfsFilePath);

          // add the archive file to distributed cache
          DistributedCache.createSymlink(job);
          DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job);
          LOG.info(
              "Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri());
        }
      }
      work.configureJobConf(job);
      List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false);
      Utilities.setInputPaths(job, inputPaths);

      Utilities.setMapRedWork(job, work, ctx.getMRTmpPath());

      if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) {
        try {
          handleSampling(ctx, mWork, job);
          job.setPartitionerClass(HiveTotalOrderPartitioner.class);
        } catch (IllegalStateException e) {
          console.printInfo("Not enough sampling data.. Rolling back to single reducer task");
          rWork.setNumReduceTasks(1);
          job.setNumReduceTasks(1);
        } catch (Exception e) {
          LOG.error("Sampling error", e);
          console.printError(
              e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
          rWork.setNumReduceTasks(1);
          job.setNumReduceTasks(1);
        }
      }

      // remove the pwd from conf file so that job tracker doesn't show this
      // logs
      String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
      }
      JobClient jc = new JobClient(job);
      // make this client wait if job tracker is not behaving well.
      Throttle.checkJobTracker(job, LOG);

      if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        StatsFactory factory = StatsFactory.newFactory(job);
        if (factory != null) {
          statsPublisher = factory.getStatsPublisher();
          List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job);
          if (rWork != null) {
            statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job));
          }
          StatsCollectionContext sc = new StatsCollectionContext(job);
          sc.setStatsTmpDirs(statsTmpDir);
          if (!statsPublisher.init(sc)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw new HiveException(
                  ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }
        }
      }

      Utilities.createTmpDirs(job, mWork);
      Utilities.createTmpDirs(job, rWork);

      SessionState ss = SessionState.get();
      if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
          && ss != null) {
        TezSessionState session = ss.getTezSession();
        TezSessionPoolManager.getInstance().close(session, true);
      }

      // Finally SUBMIT the JOB!
      rj = jc.submitJob(job);
      // replace it back
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd);
      }

      returnVal = jobExecHelper.progress(rj, jc, ctx.getHiveTxnManager());
      success = (returnVal == 0);
    } catch (Exception e) {
      e.printStackTrace();
      String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
      if (rj != null) {
        mesg = "Ended Job = " + rj.getJobID() + mesg;
      } else {
        mesg = "Job Submission failed" + mesg;
      }

      // Has to use full name to make sure it does not conflict with
      // org.apache.commons.lang.StringUtils
      console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));

      success = false;
      returnVal = 1;
    } finally {
      Utilities.clearWork(job);
      try {
        if (ctxCreated) {
          ctx.clear();
        }

        if (rj != null) {
          if (returnVal != 0) {
            rj.killJob();
          }
          jobID = rj.getID().toString();
        }
      } catch (Exception e) {
        LOG.warn("Failed while cleaning up ", e);
      } finally {
        HadoopJobExecHelper.runningJobs.remove(rj);
      }
    }

    // get the list of Dynamic partition paths
    try {
      if (rj != null) {
        if (mWork.getAliasToWork() != null) {
          for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) {
            op.jobClose(job, success);
          }
        }
        if (rWork != null) {
          rWork.getReducer().jobClose(job, success);
        }
      }
    } catch (Exception e) {
      // jobClose needs to execute successfully otherwise fail task
      if (success) {
        success = false;
        returnVal = 3;
        String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'";
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
      }
    }

    return (returnVal);
  }
Beispiel #10
0
  public void validate() throws SemanticException {

    if ((this.getCols() == null) || (this.getCols().size() == 0)) {
      // for now make sure that serde exists
      if (StringUtils.isEmpty(this.getSerName())
          || !SerDeUtils.shouldGetColsFromSerDe(this.getSerName())) {
        throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg());
      }
      return;
    }

    if (this.getStorageHandler() == null) {
      try {
        Class<?> origin = Class.forName(this.getOutputFormat(), true, JavaUtils.getClassLoader());
        Class<? extends HiveOutputFormat> replaced =
            HiveFileFormatUtils.getOutputFormatSubstitute(origin);
        if (replaced == null) {
          throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
        }
      } catch (ClassNotFoundException e) {
        throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
      }
    }

    List<String> colNames = ParseUtils.validateColumnNameUniqueness(this.getCols());

    if (this.getBucketCols() != null) {
      // all columns in cluster and sort are valid columns
      Iterator<String> bucketCols = this.getBucketCols().iterator();
      while (bucketCols.hasNext()) {
        String bucketCol = bucketCols.next();
        boolean found = false;
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = colNamesIter.next();
          if (bucketCol.equalsIgnoreCase(colName)) {
            found = true;
            break;
          }
        }
        if (!found) {
          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
        }
      }
    }

    if (this.getSortCols() != null) {
      // all columns in cluster and sort are valid columns
      Iterator<Order> sortCols = this.getSortCols().iterator();
      while (sortCols.hasNext()) {
        String sortCol = sortCols.next().getCol();
        boolean found = false;
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = colNamesIter.next();
          if (sortCol.equalsIgnoreCase(colName)) {
            found = true;
            break;
          }
        }
        if (!found) {
          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
        }
      }
    }

    if (this.getPartCols() != null) {
      // there is no overlap between columns and partitioning columns
      Iterator<FieldSchema> partColsIter = this.getPartCols().iterator();
      while (partColsIter.hasNext()) {
        FieldSchema fs = partColsIter.next();
        String partCol = fs.getName();
        PrimitiveObjectInspectorUtils.PrimitiveTypeEntry pte =
            PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(fs.getType());
        if (null == pte) {
          throw new SemanticException(
              ErrorMsg.PARTITION_COLUMN_NON_PRIMITIVE.getMsg()
                  + " Found "
                  + partCol
                  + " of type: "
                  + fs.getType());
        }
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = BaseSemanticAnalyzer.unescapeIdentifier(colNamesIter.next());
          if (partCol.equalsIgnoreCase(colName)) {
            throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg());
          }
        }
      }
    }

    /* Validate skewed information. */
    ValidationUtility.validateSkewedInformation(
        colNames, this.getSkewedColNames(), this.getSkewedColValues());
  }