Exemple #1
0
  public static void main(String[] args) throws Exception {
    Thread.setDefaultUncaughtExceptionHandler(new LlapDaemonUncaughtExceptionHandler());
    LlapDaemon llapDaemon = null;
    try {
      // Cache settings will need to be setup in llap-daemon-site.xml - since the daemons don't read
      // hive-site.xml
      // Ideally, these properties should be part of LlapDameonConf rather than HiveConf
      LlapConfiguration daemonConf = new LlapConfiguration();
      int numExecutors =
          daemonConf.getInt(
              LlapConfiguration.LLAP_DAEMON_NUM_EXECUTORS,
              LlapConfiguration.LLAP_DAEMON_NUM_EXECUTORS_DEFAULT);

      String[] localDirs = daemonConf.getTrimmedStrings(LlapConfiguration.LLAP_DAEMON_WORK_DIRS);
      int rpcPort =
          daemonConf.getInt(
              LlapConfiguration.LLAP_DAEMON_RPC_PORT,
              LlapConfiguration.LLAP_DAEMON_RPC_PORT_DEFAULT);
      int shufflePort =
          daemonConf.getInt(
              ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, ShuffleHandler.DEFAULT_SHUFFLE_PORT);
      long executorMemoryBytes =
          daemonConf.getInt(
                  LlapConfiguration.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB,
                  LlapConfiguration.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB_DEFAULT)
              * 1024l
              * 1024l;
      long cacheMemoryBytes =
          HiveConf.getLongVar(daemonConf, HiveConf.ConfVars.LLAP_ORC_CACHE_MAX_SIZE);
      boolean isDirectCache =
          HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_ORC_CACHE_ALLOCATE_DIRECT);
      boolean llapIoEnabled = HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_IO_ENABLED);
      llapDaemon =
          new LlapDaemon(
              daemonConf,
              numExecutors,
              executorMemoryBytes,
              llapIoEnabled,
              isDirectCache,
              cacheMemoryBytes,
              localDirs,
              rpcPort,
              shufflePort);

      LOG.info("Adding shutdown hook for LlapDaemon");
      ShutdownHookManager.addShutdownHook(new CompositeServiceShutdownHook(llapDaemon), 1);

      llapDaemon.init(daemonConf);
      llapDaemon.start();
      LOG.info("Started LlapDaemon");
      // Relying on the RPC threads to keep the service alive.
    } catch (Throwable t) {
      // TODO Replace this with a ExceptionHandler / ShutdownHook
      LOG.warn("Failed to start LLAP Daemon with exception", t);
      if (llapDaemon != null) {
        llapDaemon.shutdown();
      }
      System.exit(-1);
    }
  }
Exemple #2
0
  /**
   * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the
   * Table/Partition metadata. For atomicity, we should not change it before the data is actually
   * there done by MoveTask.
   *
   * @param nd the FileSinkOperator whose results are taken care of by the MoveTask.
   * @param mvTask The MoveTask that moves the FileSinkOperator's results.
   * @param currTask The MapRedTask that the FileSinkOperator belongs to.
   * @param hconf HiveConf
   */
  private void addStatsTask(
      FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) {

    MoveWork mvWork = ((MoveTask) mvTask).getWork();
    StatsWork statsWork = null;
    if (mvWork.getLoadTableWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadTableWork());
    } else if (mvWork.getLoadFileWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadFileWork());
    }
    assert statsWork != null : "Error when genereting StatsTask";
    statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    MapredWork mrWork = (MapredWork) currTask.getWork();

    // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix
    // in FileSinkDesc is used for stats publishing. They should be consistent.
    statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix());
    Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf);

    // mark the MapredWork and FileSinkOperator for gathering stats
    nd.getConf().setGatherStats(true);
    mrWork.setGatheringStats(true);
    nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    nd.getConf()
        .setMaxStatsKeyPrefixLength(hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH));
    // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName());

    // subscribe feeds from the MoveTask so that MoveTask can forward the list
    // of dynamic partition list to the StatsTask
    mvTask.addDependentTask(statsTask);
    statsTask.subscribeFeed(mvTask);
  }
 public ThriftHttpServlet(
     TProcessor processor,
     TProtocolFactory protocolFactory,
     String authType,
     UserGroupInformation serviceUGI,
     UserGroupInformation httpUGI,
     HiveAuthFactory hiveAuthFactory) {
   super(processor, protocolFactory);
   this.authType = authType;
   this.serviceUGI = serviceUGI;
   this.httpUGI = httpUGI;
   this.hiveAuthFactory = hiveAuthFactory;
   this.isCookieAuthEnabled =
       hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_AUTH_ENABLED);
   // Initialize the cookie based authentication related variables.
   if (isCookieAuthEnabled) {
     // Generate the signer with secret.
     String secret = Long.toString(RAN.nextLong());
     LOG.debug("Using the random number as the secret for cookie generation " + secret);
     this.signer = new CookieSigner(secret.getBytes());
     this.cookieMaxAge =
         (int)
             hiveConf.getTimeVar(
                 ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_MAX_AGE, TimeUnit.SECONDS);
     this.cookieDomain = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_DOMAIN);
     this.cookiePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_PATH);
     // always send secure cookies for SSL mode
     this.isCookieSecure = hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL);
     this.isHttpOnlyCookie =
         hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_HTTPONLY);
   }
 }
  /**
   * File Sink Operator encountered.
   *
   * @param nd the file sink operator encountered
   * @param opProcCtx context
   */
  public Object process(
      Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs)
      throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    FileSinkOperator fsOp = (FileSinkOperator) nd;
    boolean isInsertTable = // is INSERT OVERWRITE TABLE
        fsOp.getConf().getTableInfo().getTableName() != null
            && parseCtx.getQB().getParseInfo().isInsertToTable();
    HiveConf hconf = parseCtx.getConf();

    // Has the user enabled merging of files for map-only jobs or for all jobs
    if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) {
      List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();

      // In case of unions or map-joins, it is possible that the file has
      // already been seen.
      // So, no need to attempt to merge the files again.
      if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) {

        // no need of merging if the move is to a local file system
        MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp);

        if (isInsertTable && hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
          addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf());
        }

        if ((mvTask != null) && !mvTask.isLocal()) {
          // There are separate configuration parameters to control whether to
          // merge for a map-only job
          // or for a map-reduce job
          MapredWork currWork = (MapredWork) currTask.getWork();
          boolean mergeMapOnly =
              hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES)
                  && currWork.getReducer() == null;
          boolean mergeMapRed =
              hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES)
                  && currWork.getReducer() != null;
          if (mergeMapOnly || mergeMapRed) {
            chDir = true;
          }
        }
      }
    }

    String finalName = processFS(nd, stack, opProcCtx, chDir);

    // need to merge the files in the destination table/partitions
    if (chDir && (finalName != null)) {
      createMergeJob((FileSinkOperator) nd, ctx, finalName);
    }

    return null;
  }
 /**
  * create a new plan and return.
  *
  * @return the new plan
  */
 public static MapredWork getMapRedWork(HiveConf conf) {
   MapredWork work = new MapredWork();
   // This code has been only added for testing
   boolean mapperCannotSpanPartns =
       conf.getBoolVar(HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
   work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
   work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
   work.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>());
   work.setAliasToWork(new LinkedHashMap<String, Operator<? extends Serializable>>());
   work.setTagToValueDesc(new ArrayList<TableDesc>());
   work.setReducer(null);
   work.setHadoopSupportsSplittable(
       conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE));
   return work;
 }
Exemple #6
0
  public int processLine(String line) {
    int lastRet = 0, ret = 0;

    String command = "";
    for (String oneCmd : line.split(";")) {

      if (StringUtils.endsWith(oneCmd, "\\")) {
        command += StringUtils.chop(oneCmd) + ";";
        continue;
      } else {
        command += oneCmd;
      }
      if (StringUtils.isBlank(command)) continue;

      try {
        ret = processCmd(command);
      } catch (TException e) {
        e.printStackTrace();
      } catch (HiveException e) {
        e.printStackTrace();
      }
      command = "";
      lastRet = ret;
      boolean ignoreErrors = HiveConf.getBoolVar(conf, HiveConf.ConfVars.CLIIGNOREERRORS);
      if (ret != 0 && !ignoreErrors) {
        return ret;
      }
    }
    return lastRet;
  }
 public boolean getIsSilent() {
   if (conf != null) {
     return conf.getBoolVar(HiveConf.ConfVars.HIVESESSIONSILENT);
   } else {
     return isSilent;
   }
 }
Exemple #8
0
  public RowSet getOperationLogRowSet(
      OperationHandle opHandle, FetchOrientation orientation, long maxRows, HiveConf hConf)
      throws HiveSQLException {
    TableSchema tableSchema = new TableSchema(getLogSchema());
    RowSet rowSet = RowSetFactory.create(tableSchema, getOperation(opHandle).getProtocolVersion());

    if (hConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED) == false) {
      LOG.warn(
          "Try to get operation log when hive.server2.logging.operation.enabled is false, no log will be returned. ");
      return rowSet;
    }
    // get the OperationLog object from the operation
    OperationLog operationLog = getOperation(opHandle).getOperationLog();
    if (operationLog == null) {
      throw new HiveSQLException("Couldn't find log associated with operation handle: " + opHandle);
    }

    // read logs
    List<String> logs;
    try {
      logs = operationLog.readOperationLog(isFetchFirst(orientation), maxRows);
    } catch (SQLException e) {
      throw new HiveSQLException(e.getMessage(), e.getCause());
    }

    // convert logs to RowSet
    for (String log : logs) {
      rowSet.addRow(new String[] {log});
    }

    return rowSet;
  }
 public void closeSession(SessionHandle sessionHandle) throws HiveSQLException {
   HiveSession session = handleToSession.remove(sessionHandle);
   if (session == null) {
     throw new HiveSQLException("Session does not exist!");
   }
   session.close();
   // Shutdown HiveServer2 if it has been deregistered from ZooKeeper and has no active sessions
   if (!(hiveServer2 == null)
       && (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY))
       && (!hiveServer2.isRegisteredWithZooKeeper())) {
     // Asynchronously shutdown this instance of HiveServer2,
     // if there are no active client sessions
     if (getOpenSessionCount() == 0) {
       LOG.info(
           "This instance of HiveServer2 has been removed from the list of server "
               + "instances available for dynamic service discovery. "
               + "The last client session has ended - will shutdown now.");
       Thread shutdownThread =
           new Thread() {
             @Override
             public void run() {
               hiveServer2.stop();
             }
           };
       shutdownThread.start();
     }
   }
 }
Exemple #10
0
 /** Update the options after connection is established in CLI mode. */
 public void updateBeeLineOptsFromConf() {
   if (!beeLine.isBeeLine()) {
     if (conf == null) {
       conf = beeLine.getCommands().getHiveConf(false);
     }
     setForce(HiveConf.getBoolVar(conf, HiveConf.ConfVars.CLIIGNOREERRORS));
   }
 }
Exemple #11
0
 public boolean getShowHeader() {
   if (beeLine.isBeeLine()) {
     return showHeader;
   } else {
     boolean header;
     HiveConf conf = beeLine.getCommands().getHiveConf(true);
     header = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CLI_PRINT_HEADER);
     return header;
   }
 }
 @Override
 public synchronized void init(HiveConf hiveConf) {
   this.hiveConf = hiveConf;
   // Create operation log root directory, if operation logging is enabled
   if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) {
     initOperationLogRootDir();
   }
   createBackgroundOperationPool();
   addService(operationManager);
   super.init(hiveConf);
 }
Exemple #13
0
  /*
   * Helper function to create JobConf for specific ReduceWork.
   */
  private JobConf initializeVertexConf(JobConf baseConf, ReduceWork reduceWork) {
    JobConf conf = new JobConf(baseConf);

    conf.set("mapred.reducer.class", ExecReducer.class.getName());

    boolean useSpeculativeExecReducers =
        HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
    HiveConf.setBoolVar(
        conf, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers);

    return conf;
  }
Exemple #14
0
  /**
   * Create a vertex from a given work object.
   *
   * @param conf JobConf to be used to this execution unit
   * @param work The instance of BaseWork representing the actual work to be performed by this
   *     vertex.
   * @param scratchDir HDFS scratch dir for this execution unit.
   * @param list
   * @param appJarLr Local resource for hive-exec.
   * @param additionalLr
   * @param fileSystem FS corresponding to scratchDir and LocalResources
   * @param ctx This query's context
   * @return Vertex
   */
  public Vertex createVertex(
      JobConf conf,
      BaseWork work,
      Path scratchDir,
      LocalResource appJarLr,
      List<LocalResource> additionalLr,
      FileSystem fileSystem,
      Context ctx,
      boolean hasChildren,
      TezWork tezWork)
      throws Exception {

    Vertex v = null;
    // simply dispatch the call to the right method for the actual (sub-) type of
    // BaseWork.
    if (work instanceof MapWork) {
      v =
          createVertex(
              conf, (MapWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx, tezWork);
    } else if (work instanceof ReduceWork) {
      v =
          createVertex(
              conf, (ReduceWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx);
    } else {
      // something is seriously wrong if this is happening
      throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
    }

    // initialize stats publisher if necessary
    if (work.isGatheringStats()) {
      StatsPublisher statsPublisher;
      StatsFactory factory = StatsFactory.newFactory(conf);
      if (factory != null) {
        statsPublisher = factory.getStatsPublisher();
        if (!statsPublisher.init(conf)) { // creating stats table if not exists
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
            throw new HiveException(
                ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
          }
        }
      }
    }

    // final vertices need to have at least one output
    if (!hasChildren) {
      v.addOutput(
          "out_" + work.getName(),
          new OutputDescriptor(MROutput.class.getName())
              .setUserPayload(MRHelpers.createUserPayloadFromConf(conf)));
    }

    return v;
  }
Exemple #15
0
 public static boolean checkAndSetAsyncLogging(final Configuration conf) {
   final boolean asyncLogging = HiveConf.getBoolVar(conf, ConfVars.HIVE_ASYNC_LOG_ENABLED);
   if (asyncLogging) {
     System.setProperty(
         "Log4jContextSelector", "org.apache.logging.log4j.core.async.AsyncLoggerContextSelector");
     // default is ClassLoaderContextSelector which is created during automatic logging
     // initialization in a static initialization block.
     // Changing ContextSelector at runtime requires creating new context factory which will
     // internally create new context selector based on system property.
     LogManager.setFactory(new Log4jContextFactory());
   }
   return asyncLogging;
 }
 public SessionState(HiveConf conf, String userName) {
   this.conf = conf;
   this.userName = userName;
   isSilent = conf.getBoolVar(HiveConf.ConfVars.HIVESESSIONSILENT);
   ls = new LineageState();
   // Must be deterministic order map for consistent q-test output across Java versions
   overriddenConfigurations = new LinkedHashMap<String, String>();
   overriddenConfigurations.putAll(HiveConf.getConfSystemProperties());
   // if there isn't already a session name, go ahead and create it.
   if (StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HIVESESSIONID))) {
     conf.setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId());
   }
   parentLoader = JavaUtils.getClassLoader();
 }
Exemple #17
0
 @Override
 public synchronized void init(HiveConf hiveConf) {
   if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) {
     initOperationLogCapture(
         hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL));
   } else {
     LOG.debug("Operation level logging is turned off");
   }
   if (hiveConf.isWebUiQueryInfoCacheEnabled()) {
     historicSqlOperations =
         new SQLOperationDisplayCache(
             hiveConf.getIntVar(ConfVars.HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES));
   }
   super.init(hiveConf);
 }
Exemple #18
0
    private void generateJDOFilterOverPartitions(
        Configuration conf, Table table, Map<String, Object> params, FilterBuilder filterBuilder)
        throws MetaException {
      int partitionColumnCount = table.getPartitionKeys().size();
      int partitionColumnIndex = getPartColIndexForFilter(table, filterBuilder);
      if (filterBuilder.hasError()) return;

      boolean canPushDownIntegral =
          HiveConf.getBoolVar(conf, HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN);
      String valueAsString =
          getJdoFilterPushdownParam(
              table, partitionColumnIndex, filterBuilder, canPushDownIntegral);
      if (filterBuilder.hasError()) return;

      String paramName = PARAM_PREFIX + params.size();
      params.put(paramName, valueAsString);

      boolean isOpEquals = operator == Operator.EQUALS;
      if (isOpEquals || operator == Operator.NOTEQUALS || operator == Operator.NOTEQUALS2) {
        makeFilterForEquals(
            keyName,
            valueAsString,
            paramName,
            params,
            partitionColumnIndex,
            partitionColumnCount,
            isOpEquals,
            filterBuilder);
        return;
      }
      // get the value for a partition key form MPartition.values (PARTITION_KEY_VALUES)
      String valString = "values.get(" + partitionColumnIndex + ")";

      if (operator == Operator.LIKE) {
        if (isReverseOrder) {
          // For LIKE, the value should be on the RHS.
          filterBuilder.setError(
              "Value should be on the RHS for LIKE operator : Key <" + keyName + ">");
        }
        // TODO: in all likelihood, this won't actually work. Keep it for backward compat.
        filterBuilder.append(" " + valString + "." + operator.getJdoOp() + "(" + paramName + ") ");
      } else {
        filterBuilder.append(
            isReverseOrder
                ? paramName + " " + operator.getJdoOp() + " " + valString
                : " " + valString + " " + operator.getJdoOp() + " " + paramName);
      }
    }
  /**
   * Test setting {@link HiveConf.ConfVars}} config parameter HIVE_SERVER2_ENABLE_DOAS for unsecure
   * mode
   */
  public void testDoAsSetting() {

    HiveConf hconf = new HiveConf();
    assertTrue(
        "default value of hive server2 doAs should be true",
        hconf.getBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS));

    CLIService cliService = new CLIService();
    cliService.init(hconf);
    ThriftCLIService tcliService = new ThriftCLIService(cliService);
    tcliService.init(hconf);
    TProcessorFactory procFactory = PlainSaslHelper.getPlainProcessorFactory(tcliService);
    assertEquals(
        "doAs enabled processor for unsecure mode",
        procFactory.getProcessor(null).getClass(),
        TUGIContainingProcessor.class);
  }
  /**
   * Create a MapReduce job for a particular partition if Hadoop version is pre 0.20, otherwise
   * create a Map-only job using CombineHiveInputFormat for all partitions.
   *
   * @param fsOp The FileSink operator.
   * @param ctx The MR processing context.
   * @param finalName the final destination path the merge job should output.
   * @throws SemanticException
   */
  private void createMergeJob(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName)
      throws SemanticException {

    // if the hadoop version support CombineFileInputFormat (version >= 0.20),
    // create a Map-only job for merge, otherwise create a MapReduce merge job.
    ParseContext parseCtx = ctx.getParseCtx();
    HiveConf conf = parseCtx.getConf();
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPONLY)
        && Utilities.supportCombineFileInputFormat()) {
      // create Map-only merge job
      createMap4Merge(fsOp, ctx, finalName);
      LOG.info("use CombineHiveInputformat for the merge job");
    } else {
      createMapReduce4Merge(fsOp, ctx, finalName);
      LOG.info("use HiveInputFormat for the merge job");
    }
  }
Exemple #21
0
  public static SplitLocationProvider getSplitLocationProvider(Configuration conf, Logger LOG)
      throws IOException {
    boolean useCustomLocations =
        HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS);
    SplitLocationProvider splitLocationProvider;
    LOG.info("SplitGenerator using llap affinitized locations: " + useCustomLocations);
    if (useCustomLocations) {
      LlapRegistryService serviceRegistry;
      serviceRegistry = LlapRegistryService.getClient(conf);

      Collection<ServiceInstance> serviceInstances =
          serviceRegistry.getInstances().getAllInstancesOrdered(true);
      ArrayList<String> locations = new ArrayList<>(serviceInstances.size());
      for (ServiceInstance serviceInstance : serviceInstances) {
        if (LOG.isDebugEnabled()) {
          LOG.debug(
              "Adding "
                  + serviceInstance.getWorkerIdentity()
                  + " with hostname="
                  + serviceInstance.getHost()
                  + " to list for split locations");
        }
        locations.add(serviceInstance.getHost());
      }
      splitLocationProvider = new HostAffinitySplitLocationProvider(locations);
    } else {
      splitLocationProvider =
          new SplitLocationProvider() {
            @Override
            public String[] getLocations(InputSplit split) throws IOException {
              if (split == null) {
                return null;
              }
              String[] locations = split.getLocations();
              if (locations != null && locations.length == 1) {
                if ("localhost".equals(locations[0])) {
                  return ArrayUtils.EMPTY_STRING_ARRAY;
                }
              }
              return locations;
            }
          };
    }
    return splitLocationProvider;
  }
Exemple #22
0
 private FunctionInfo getQualifiedFunctionInfoUnderLock(String qualifiedName)
     throws SemanticException {
   FunctionInfo info = mFunctions.get(qualifiedName);
   if (info != null && info.isBlockedFunction()) {
     throw new SemanticException("UDF " + qualifiedName + " is not allowed");
   }
   if (!isNative && info != null && info.isDiscarded()) {
     // the persistent function is discarded. try reload
     mFunctions.remove(qualifiedName);
     return null;
   }
   // HIVE-6672: In HiveServer2 the JARs for this UDF may have been loaded by a different thread,
   // and the current thread may not be able to resolve the UDF. Test for this condition
   // and if necessary load the JARs in this thread.
   if (isNative && info != null && info.isPersistent()) {
     return registerToSessionRegistry(qualifiedName, info);
   }
   if (info != null || !isNative) {
     return info; // We have the UDF, or we are in the session registry (or both).
   }
   // If we are in the system registry and this feature is enabled, try to get it from metastore.
   SessionState ss = SessionState.get();
   HiveConf conf = (ss == null) ? null : ss.getConf();
   if (conf == null || !HiveConf.getBoolVar(conf, ConfVars.HIVE_ALLOW_UDF_LOAD_ON_DEMAND)) {
     return null;
   }
   // This is a little bit weird. We'll do the MS call outside of the lock. Our caller calls us
   // under lock, so we'd preserve the lock state for them; their finally block will release the
   // lock correctly. See the comment on the lock field - the locking needs to be reworked.
   lock.unlock();
   try {
     return getFunctionInfoFromMetastoreNoLock(qualifiedName, conf);
   } finally {
     lock.lock();
   }
 }
Exemple #23
0
  private LlapIoImpl(Configuration conf) throws IOException {
    String ioMode = HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MODE);
    boolean useLowLevelCache = LlapIoImpl.MODE_CACHE.equalsIgnoreCase(ioMode),
        useAllocOnly = !useLowLevelCache && LlapIoImpl.MODE_ALLOCATOR.equalsIgnoreCase(ioMode);
    LOG.info("Initializing LLAP IO in {} mode", ioMode);

    String displayName = "LlapDaemonCacheMetrics-" + MetricsUtils.getHostName();
    String sessionId = conf.get("llap.daemon.metrics.sessionid");
    this.cacheMetrics = LlapDaemonCacheMetrics.create(displayName, sessionId);

    displayName = "LlapDaemonQueueMetrics-" + MetricsUtils.getHostName();
    int[] intervals =
        conf.getInts(String.valueOf(HiveConf.ConfVars.LLAP_QUEUE_METRICS_PERCENTILE_INTERVALS));
    this.queueMetrics = LlapDaemonQueueMetrics.create(displayName, sessionId, intervals);

    LOG.info(
        "Started llap daemon metrics with displayName: {} sessionId: {}", displayName, sessionId);

    OrcMetadataCache metadataCache = null;
    LowLevelCacheImpl orcCache = null;
    BufferUsageManager bufferManager = null;
    if (useLowLevelCache) {
      // Memory manager uses cache policy to trigger evictions, so create the policy first.
      boolean useLrfu = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_USE_LRFU);
      LowLevelCachePolicy cachePolicy =
          useLrfu ? new LowLevelLrfuCachePolicy(conf) : new LowLevelFifoCachePolicy(conf);
      // Allocator uses memory manager to request memory, so create the manager next.
      LowLevelCacheMemoryManager memManager =
          new LowLevelCacheMemoryManager(conf, cachePolicy, cacheMetrics);
      // Cache uses allocator to allocate and deallocate, create allocator and then caches.
      EvictionAwareAllocator allocator = new BuddyAllocator(conf, memManager, cacheMetrics);
      this.allocator = allocator;
      orcCache = new LowLevelCacheImpl(cacheMetrics, cachePolicy, allocator, true);
      metadataCache = new OrcMetadataCache(memManager, cachePolicy);
      // And finally cache policy uses cache to notify it of eviction. The cycle is complete!
      cachePolicy.setEvictionListener(new EvictionDispatcher(orcCache, metadataCache));
      cachePolicy.setParentDebugDumper(orcCache);
      orcCache.init(); // Start the cache threads.
      bufferManager = orcCache; // Cache also serves as buffer manager.
    } else {
      if (useAllocOnly) {
        LowLevelCacheMemoryManager memManager =
            new LowLevelCacheMemoryManager(conf, null, cacheMetrics);
        allocator = new BuddyAllocator(conf, memManager, cacheMetrics);
      } else {
        allocator = new SimpleAllocator(conf);
      }
      bufferManager = new SimpleBufferManager(allocator, cacheMetrics);
    }
    // IO thread pool. Listening is used for unhandled errors for now (TODO: remove?)
    int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_IO_THREADPOOL_SIZE);
    executor =
        MoreExecutors.listeningDecorator(
            Executors.newFixedThreadPool(
                numThreads,
                new ThreadFactoryBuilder()
                    .setNameFormat("IO-Elevator-Thread-%d")
                    .setDaemon(true)
                    .build()));

    // TODO: this should depends on input format and be in a map, or something.
    this.cvp =
        new OrcColumnVectorProducer(
            metadataCache, orcCache, bufferManager, conf, cacheMetrics, queueMetrics);
    LOG.info("LLAP IO initialized");

    registerMXBeans();
  }
Exemple #24
0
  /**
   * File Sink Operator encountered.
   *
   * @param nd the file sink operator encountered
   * @param opProcCtx context
   */
  public Object process(
      Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs)
      throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    FileSinkOperator fsOp = (FileSinkOperator) nd;
    boolean isInsertTable = // is INSERT OVERWRITE TABLE
        fsOp.getConf().getTableInfo().getTableName() != null
            && parseCtx.getQB().getParseInfo().isInsertToTable();
    HiveConf hconf = parseCtx.getConf();

    // Mark this task as a final map reduce task (ignoring the optional merge task)
    ((MapredWork) currTask.getWork()).setFinalMapRed(true);

    // If this file sink desc has been processed due to a linked file sink desc,
    // use that task
    Map<FileSinkDesc, Task<? extends Serializable>> fileSinkDescs = ctx.getLinkedFileDescTasks();
    if (fileSinkDescs != null) {
      Task<? extends Serializable> childTask = fileSinkDescs.get(fsOp.getConf());
      processLinkedFileDesc(ctx, childTask);
      return null;
    }

    // Has the user enabled merging of files for map-only jobs or for all jobs
    if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) {
      List<Task<MoveWork>> mvTasks = ctx.getMvTask();

      // In case of unions or map-joins, it is possible that the file has
      // already been seen.
      // So, no need to attempt to merge the files again.
      if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) {

        // no need of merging if the move is to a local file system
        MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp);

        if (isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)) {
          addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf());
        }

        if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) {
          if (fsOp.getConf().isLinkedFileSink()) {
            // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the
            // number of reducers are few, so the number of files anyway are small.
            // However, with this optimization, we are increasing the number of files
            // possibly by a big margin. So, merge aggresively.
            if (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES)
                || hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)) {
              chDir = true;
            }
          } else {
            // There are separate configuration parameters to control whether to
            // merge for a map-only job
            // or for a map-reduce job
            MapredWork currWork = (MapredWork) currTask.getWork();
            boolean mergeMapOnly =
                hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && currWork.getReducer() == null;
            boolean mergeMapRed =
                hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && currWork.getReducer() != null;
            if (mergeMapOnly || mergeMapRed) {
              chDir = true;
            }
          }
        }
      }
    }

    String finalName = processFS(fsOp, stack, opProcCtx, chDir);

    if (chDir) {
      // Merge the files in the destination table/partitions by creating Map-only merge job
      // If underlying data is RCFile or OrcFile a BlockMerge task would be created.
      LOG.info("using CombineHiveInputformat for the merge job");
      createMRWorkForMergingFiles(fsOp, ctx, finalName);
    }

    FileSinkDesc fileSinkDesc = fsOp.getConf();
    if (fileSinkDesc.isLinkedFileSink()) {
      Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks =
          ctx.getLinkedFileDescTasks();
      if (linkedFileDescTasks == null) {
        linkedFileDescTasks = new HashMap<FileSinkDesc, Task<? extends Serializable>>();
        ctx.setLinkedFileDescTasks(linkedFileDescTasks);
      }

      // The child tasks may be null in case of a select
      if ((currTask.getChildTasks() != null) && (currTask.getChildTasks().size() == 1)) {
        for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) {
          linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0));
        }
      }
    }

    return null;
  }
Exemple #25
0
  /*
   * Helper function to create Vertex from MapWork.
   */
  private Vertex createVertex(
      JobConf conf,
      MapWork mapWork,
      LocalResource appJarLr,
      List<LocalResource> additionalLr,
      FileSystem fs,
      Path mrScratchDir,
      Context ctx,
      TezWork tezWork)
      throws Exception {

    Path tezDir = getTezDir(mrScratchDir);

    // set up the operator plan
    Utilities.setMapWork(conf, mapWork, mrScratchDir, false);

    // create the directories FileSinkOperators need
    Utilities.createTmpDirs(conf, mapWork);

    // Tez ask us to call this even if there's no preceding vertex
    MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null);

    // finally create the vertex
    Vertex map = null;

    // use tez to combine splits
    boolean useTezGroupedSplits = false;

    int numTasks = -1;
    Class amSplitGeneratorClass = null;
    InputSplitInfo inputSplitInfo = null;
    Class inputFormatClass = conf.getClass("mapred.input.format.class", InputFormat.class);

    boolean vertexHasCustomInput = false;
    if (tezWork != null) {
      for (BaseWork baseWork : tezWork.getParents(mapWork)) {
        if (tezWork.getEdgeType(baseWork, mapWork) == EdgeType.CUSTOM_EDGE) {
          vertexHasCustomInput = true;
        }
      }
    }
    if (vertexHasCustomInput) {
      useTezGroupedSplits = false;
      // grouping happens in execution phase. Setting the class to TezGroupedSplitsInputFormat
      // here would cause pre-mature grouping which would be incorrect.
      inputFormatClass = HiveInputFormat.class;
      conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
      // mapreduce.tez.input.initializer.serialize.event.payload should be set to false when using
      // this plug-in to avoid getting a serialized event at run-time.
      conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
    } else {
      // we'll set up tez to combine spits for us iff the input format
      // is HiveInputFormat
      if (inputFormatClass == HiveInputFormat.class) {
        useTezGroupedSplits = true;
        conf.setClass(
            "mapred.input.format.class", TezGroupedSplitsInputFormat.class, InputFormat.class);
      }
    }

    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION)) {
      // if we're generating the splits in the AM, we just need to set
      // the correct plugin.
      amSplitGeneratorClass = MRInputAMSplitGenerator.class;
    } else {
      // client side split generation means we have to compute them now
      inputSplitInfo =
          MRHelpers.generateInputSplits(
              conf, new Path(tezDir, "split_" + mapWork.getName().replaceAll(" ", "_")));
      numTasks = inputSplitInfo.getNumTasks();
    }

    byte[] serializedConf = MRHelpers.createUserPayloadFromConf(conf);
    map =
        new Vertex(
            mapWork.getName(),
            new ProcessorDescriptor(MapTezProcessor.class.getName()).setUserPayload(serializedConf),
            numTasks,
            getContainerResource(conf));
    Map<String, String> environment = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(conf, environment, true);
    map.setTaskEnvironment(environment);
    map.setJavaOpts(getContainerJavaOpts(conf));

    assert mapWork.getAliasToWork().keySet().size() == 1;

    String alias = mapWork.getAliasToWork().keySet().iterator().next();

    byte[] mrInput = null;
    if (useTezGroupedSplits) {
      mrInput =
          MRHelpers.createMRInputPayloadWithGrouping(
              serializedConf, HiveInputFormat.class.getName());
    } else {
      mrInput = MRHelpers.createMRInputPayload(serializedConf, null);
    }
    map.addInput(
        alias,
        new InputDescriptor(MRInputLegacy.class.getName()).setUserPayload(mrInput),
        amSplitGeneratorClass);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(getBaseName(appJarLr), appJarLr);
    for (LocalResource lr : additionalLr) {
      localResources.put(getBaseName(lr), lr);
    }

    if (inputSplitInfo != null) {
      // only relevant for client-side split generation
      map.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
      MRHelpers.updateLocalResourcesForInputSplits(
          FileSystem.get(conf), inputSplitInfo, localResources);
    }

    map.setTaskLocalResources(localResources);
    return map;
  }
Exemple #26
0
  /**
   * @param fsInput The FileSink operator.
   * @param ctx The MR processing context.
   * @param finalName the final destination path the merge job should output.
   * @throws SemanticException
   *     <p>create a Map-only merge job using CombineHiveInputFormat for all partitions with
   *     following operators: MR job J0: ... | v FileSinkOperator_1 (fsInput) | v Merge job J1: | v
   *     TableScan (using CombineHiveInputFormat) (tsMerge) | v FileSinkOperator (fsMerge)
   *     <p>Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths
   *     do not contain the dynamic partitions (their parent). So after the dynamic partitions are
   *     created (after the first job finished before the moveTask or ConditionalTask start), we
   *     need to change the pathToPartitionInfo & pathToAlias to include the dynamic partition
   *     directories.
   */
  private void createMRWorkForMergingFiles(
      FileSinkOperator fsInput, GenMRProcContext ctx, String finalName) throws SemanticException {

    //
    // 1. create the operator tree
    //
    HiveConf conf = ctx.getParseCtx().getConf();
    FileSinkDesc fsInputDesc = fsInput.getConf();

    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends OperatorDesc> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);

    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc =
        new FileSinkDesc(finalName, ts, conf.getBoolVar(ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput =
        (FileSinkOperator) OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge);

    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol : dpCtx.getDPColNames()) {
        ColumnInfo colInfo =
            new ColumnInfo(
                dpCol,
                TypeInfoFactory.stringTypeInfo, // all partition column type should be string
                tblAlias,
                true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length() - 1); // remove the last '/'
      inputRS.setSignature(signature);

      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);

      // update the FileSinkOperator to include partition columns
      fsInputDesc
          .getTableInfo()
          .getProperties()
          .setProperty(
              org.apache
                  .hadoop
                  .hive
                  .metastore
                  .api
                  .hive_metastoreConstants
                  .META_TABLE_PARTITION_COLUMNS,
              partCols.toString()); // list of dynamic partition column names
    } else {
      // non-partitioned table
      fsInputDesc
          .getTableInfo()
          .getProperties()
          .remove(
              org.apache
                  .hadoop
                  .hive
                  .metastore
                  .api
                  .hive_metastoreConstants
                  .META_TABLE_PARTITION_COLUMNS);
    }

    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MoveWork dummyMv =
        new MoveWork(
            null,
            null,
            null,
            new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null),
            false);
    MapredWork cplan;

    if (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL)
        && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) {

      // Check if InputFormatClass is valid
      String inputFormatClass = conf.getVar(ConfVars.HIVEMERGERCFILEINPUTFORMATBLOCKLEVEL);
      try {
        Class c = (Class<? extends InputFormat>) Class.forName(inputFormatClass);

        LOG.info("RCFile format- Using block level merge");
        cplan =
            createBlockMergeTask(
                fsInputDesc,
                finalName,
                dpCtx != null && dpCtx.getNumDPCols() > 0,
                RCFileMergeMapper.class,
                RCFileInputFormat.class,
                RCFileBlockMergeInputFormat.class);
      } catch (ClassNotFoundException e) {
        String msg = "Illegal input format class: " + inputFormatClass;
        throw new SemanticException(msg);
      }

    } else if (conf.getBoolVar(ConfVars.HIVEMERGEORCBLOCKLEVEL)
        && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class)) {

      // Check if InputFormatClass is valid
      String inputFormatClass = conf.getVar(ConfVars.HIVEMERGEORCINPUTFORMATBLOCKLEVEL);
      try {
        Class c = (Class<? extends InputFormat>) Class.forName(inputFormatClass);

        LOG.info("ORCFile format- Using block level merge");
        cplan =
            createBlockMergeTask(
                fsInputDesc,
                finalName,
                dpCtx != null && dpCtx.getNumDPCols() > 0,
                OrcMergeMapper.class,
                OrcInputFormat.class,
                OrcBlockMergeInputFormat.class);
      } catch (ClassNotFoundException e) {
        String msg = "Illegal input format class: " + inputFormatClass;
        throw new SemanticException(msg);
      }

    } else {
      cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
      // use CombineHiveInputFormat for map-only merging
    }
    cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
    // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
    // know if merge MR2 will be triggered at execution time
    ConditionalTask cndTsk =
        createCondTask(conf, ctx.getCurrTask(), dummyMv, cplan, fsInputDesc.getFinalDirName());

    // keep the dynamic partition context in conditional task resolver context
    ConditionalResolverMergeFilesCtx mrCtx =
        (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx();
    mrCtx.setDPCtx(fsInputDesc.getDynPartCtx());
    mrCtx.setLbCtx(fsInputDesc.getLbCtx());

    //
    // 3. add the moveTask as the children of the conditional task
    //
    linkMoveTask(ctx, fsOutput, cndTsk);
  }
Exemple #27
0
  private String showJobFailDebugInfo() throws IOException {
    console.printError("Error during job, obtaining debugging information...");
    if (!conf.get("mapred.job.tracker", "local").equals("local")) {
      // Show Tracking URL for remotely running jobs.
      console.printError("Job Tracking URL: " + rj.getTrackingURL());
    }
    // Loop to get all task completion events because getTaskCompletionEvents
    // only returns a subset per call
    TaskInfoGrabber tlg = new TaskInfoGrabber();
    Thread t = new Thread(tlg);
    try {
      t.start();
      t.join(HiveConf.getIntVar(conf, HiveConf.ConfVars.TASKLOG_DEBUG_TIMEOUT));
    } catch (InterruptedException e) {
      console.printError(
          "Timed out trying to finish grabbing task log URLs, " + "some task info may be missing");
    }

    // Remove failures for tasks that succeeded
    for (String task : successes) {
      failures.remove(task);
    }

    if (failures.keySet().size() == 0) {
      return null;
    }
    // Find the highest failure count
    computeMaxFailures();

    // Display Error Message for tasks with the highest failure count
    String jtUrl = null;
    try {
      jtUrl = JobTrackerURLResolver.getURL(conf);
    } catch (Exception e) {
      console.printError("Unable to retrieve URL for Hadoop Task logs. " + e.getMessage());
    }

    String msg = null;
    for (String task : failures.keySet()) {
      if (failures.get(task).intValue() == maxFailures) {
        TaskInfo ti = taskIdToInfo.get(task);
        String jobId = ti.getJobId();
        String taskUrl =
            (jtUrl == null)
                ? null
                : jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid=" + task.toString();

        TaskLogProcessor tlp = new TaskLogProcessor(conf);
        for (String logUrl : ti.getLogUrls()) {
          tlp.addTaskAttemptLogUrl(logUrl);
        }

        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.JOB_DEBUG_CAPTURE_STACKTRACES)
            && stackTraces != null) {
          if (!stackTraces.containsKey(jobId)) {
            stackTraces.put(jobId, new ArrayList<List<String>>());
          }
          stackTraces.get(jobId).addAll(tlp.getStackTraces());
        }

        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.SHOW_JOB_FAIL_DEBUG_INFO)) {
          List<ErrorAndSolution> errors = tlp.getErrors();

          StringBuilder sb = new StringBuilder();
          // We use a StringBuilder and then call printError only once as
          // printError will write to both stderr and the error log file. In
          // situations where both the stderr and the log file output is
          // simultaneously output to a single stream, this will look cleaner.
          sb.append("\n");
          sb.append("Task with the most failures(" + maxFailures + "): \n");
          sb.append("-----\n");
          sb.append("Task ID:\n  " + task + "\n\n");
          if (taskUrl != null) {
            sb.append("URL:\n  " + taskUrl + "\n");
          }

          for (ErrorAndSolution e : errors) {
            sb.append("\n");
            sb.append("Possible error:\n  " + e.getError() + "\n\n");
            sb.append("Solution:\n  " + e.getSolution() + "\n");
          }
          sb.append("-----\n");

          sb.append("Diagnostic Messages for this Task:\n");
          String[] diagMesgs = ti.getDiagnosticMesgs();
          for (String mesg : diagMesgs) {
            sb.append(mesg + "\n");
          }
          msg = sb.toString();
          console.printError(msg);
        }

        // Only print out one task because that's good enough for debugging.
        break;
      }
    }
    return msg;
  }
  public Result invokeInternal(final Object proxy, final Method method, final Object[] args)
      throws Throwable {

    boolean gotNewConnectUrl = false;
    boolean reloadConf = HiveConf.getBoolVar(origConf, HiveConf.ConfVars.HMSHANDLERFORCERELOADCONF);
    long retryInterval =
        HiveConf.getTimeVar(origConf, HiveConf.ConfVars.HMSHANDLERINTERVAL, TimeUnit.MILLISECONDS);
    int retryLimit = HiveConf.getIntVar(origConf, HiveConf.ConfVars.HMSHANDLERATTEMPTS);
    long timeout =
        HiveConf.getTimeVar(
            origConf, HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS);

    Deadline.registerIfNot(timeout);

    if (reloadConf) {
      MetaStoreInit.updateConnectionURL(origConf, getActiveConf(), null, metaStoreInitData);
    }

    int retryCount = 0;
    Throwable caughtException = null;
    while (true) {
      try {
        if (reloadConf || gotNewConnectUrl) {
          baseHandler.setConf(getActiveConf());
        }
        Object object = null;
        boolean isStarted = Deadline.startTimer(method.getName());
        try {
          object = method.invoke(baseHandler, args);
        } finally {
          if (isStarted) {
            Deadline.stopTimer();
          }
        }
        return new Result(object, retryCount);

      } catch (javax.jdo.JDOException e) {
        caughtException = e;
      } catch (UndeclaredThrowableException e) {
        if (e.getCause() != null) {
          if (e.getCause() instanceof javax.jdo.JDOException) {
            // Due to reflection, the jdo exception is wrapped in
            // invocationTargetException
            caughtException = e.getCause();
          } else if (e.getCause() instanceof MetaException
              && e.getCause().getCause() != null
              && e.getCause().getCause() instanceof javax.jdo.JDOException) {
            // The JDOException may be wrapped further in a MetaException
            caughtException = e.getCause().getCause();
          } else {
            LOG.error(ExceptionUtils.getStackTrace(e.getCause()));
            throw e.getCause();
          }
        } else {
          LOG.error(ExceptionUtils.getStackTrace(e));
          throw e;
        }
      } catch (InvocationTargetException e) {
        if (e.getCause() instanceof javax.jdo.JDOException) {
          // Due to reflection, the jdo exception is wrapped in
          // invocationTargetException
          caughtException = e.getCause();
        } else if (e.getCause() instanceof NoSuchObjectException
            || e.getTargetException().getCause() instanceof NoSuchObjectException) {
          String methodName = method.getName();
          if (!methodName.startsWith("get_database")
              && !methodName.startsWith("get_table")
              && !methodName.startsWith("get_partition")
              && !methodName.startsWith("get_function")) {
            LOG.error(ExceptionUtils.getStackTrace(e.getCause()));
          }
          throw e.getCause();
        } else if (e.getCause() instanceof MetaException && e.getCause().getCause() != null) {
          if (e.getCause().getCause() instanceof javax.jdo.JDOException
              || e.getCause().getCause() instanceof NucleusException) {
            // The JDOException or the Nucleus Exception may be wrapped further in a MetaException
            caughtException = e.getCause().getCause();
          } else if (e.getCause().getCause() instanceof DeadlineException) {
            // The Deadline Exception needs no retry and be thrown immediately.
            Deadline.clear();
            LOG.error(
                "Error happens in method "
                    + method.getName()
                    + ": "
                    + ExceptionUtils.getStackTrace(e.getCause()));
            throw e.getCause();
          } else {
            LOG.error(ExceptionUtils.getStackTrace(e.getCause()));
            throw e.getCause();
          }
        } else {
          LOG.error(ExceptionUtils.getStackTrace(e.getCause()));
          throw e.getCause();
        }
      }

      if (retryCount >= retryLimit) {
        LOG.error("HMSHandler Fatal error: " + ExceptionUtils.getStackTrace(caughtException));
        // Since returning exceptions with a nested "cause" can be a problem in
        // Thrift, we are stuffing the stack trace into the message itself.
        throw new MetaException(ExceptionUtils.getStackTrace(caughtException));
      }

      assert (retryInterval >= 0);
      retryCount++;
      LOG.error(
          String.format(
                  "Retrying HMSHandler after %d ms (attempt %d of %d)",
                  retryInterval, retryCount, retryLimit)
              + " with error: "
              + ExceptionUtils.getStackTrace(caughtException));

      Thread.sleep(retryInterval);
      // If we have a connection error, the JDO connection URL hook might
      // provide us with a new URL to access the datastore.
      String lastUrl = MetaStoreInit.getConnectionURL(getActiveConf());
      gotNewConnectUrl =
          MetaStoreInit.updateConnectionURL(origConf, getActiveConf(), lastUrl, metaStoreInitData);
    }
  }
Exemple #29
0
  @SuppressWarnings("unchecked")
  public static void main(String[] args) throws IOException, HiveException {

    String planFileName = null;
    String jobConfFileName = null;
    boolean noLog = false;
    String files = null;
    boolean localtask = false;
    try {
      for (int i = 0; i < args.length; i++) {
        if (args[i].equals("-plan")) {
          planFileName = args[++i];
        } else if (args[i].equals("-jobconffile")) {
          jobConfFileName = args[++i];
        } else if (args[i].equals("-nolog")) {
          noLog = true;
        } else if (args[i].equals("-files")) {
          files = args[++i];
        } else if (args[i].equals("-localtask")) {
          localtask = true;
        }
      }
    } catch (IndexOutOfBoundsException e) {
      System.err.println("Missing argument to option");
      printUsage();
    }

    JobConf conf;
    if (localtask) {
      conf = new JobConf(MapredLocalTask.class);
    } else {
      conf = new JobConf(ExecDriver.class);
    }

    if (jobConfFileName != null) {
      conf.addResource(new Path(jobConfFileName));
    }

    if (files != null) {
      conf.set("tmpfiles", files);
    }

    if (UserGroupInformation.isSecurityEnabled()) {
      String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
      if (hadoopAuthToken != null) {
        conf.set("mapreduce.job.credentials.binary", hadoopAuthToken);
      }
    }

    boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);

    String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim();
    if (queryId.isEmpty()) {
      queryId = "unknown-" + System.currentTimeMillis();
    }
    System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId);

    if (noLog) {
      // If started from main(), and noLog is on, we should not output
      // any logs. To turn the log on, please set -Dtest.silent=false
      org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger();
      NullAppender appender = NullAppender.createNullAppender();
      appender.addToLogger(logger.getName(), Level.ERROR);
      appender.start();
    } else {
      setupChildLog4j(conf);
    }

    Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName());
    LogHelper console = new LogHelper(LOG, isSilent);

    if (planFileName == null) {
      console.printError("Must specify Plan File Name");
      printUsage();
    }

    // print out the location of the log file for the user so
    // that it's easy to find reason for local mode execution failures
    for (Appender appender :
        ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger())
            .getAppenders()
            .values()) {
      if (appender instanceof FileAppender) {
        console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
      } else if (appender instanceof RollingFileAppender) {
        console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
      }
    }

    // the plan file should always be in local directory
    Path p = new Path(planFileName);
    FileSystem fs = FileSystem.getLocal(conf);
    InputStream pathData = fs.open(p);

    // this is workaround for hadoop-17 - libjars are not added to classpath of the
    // child process. so we add it here explicitly

    String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS);
    String addedJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEADDEDJARS);
    try {
      // see also - code in CliDriver.java
      ClassLoader loader = conf.getClassLoader();
      if (StringUtils.isNotBlank(auxJars)) {
        loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","));
      }
      if (StringUtils.isNotBlank(addedJars)) {
        loader = Utilities.addToClassPath(loader, StringUtils.split(addedJars, ","));
      }
      conf.setClassLoader(loader);
      // Also set this to the Thread ContextClassLoader, so new threads will
      // inherit
      // this class loader, and propagate into newly created Configurations by
      // those
      // new threads.
      Thread.currentThread().setContextClassLoader(loader);
    } catch (Exception e) {
      throw new HiveException(e.getMessage(), e);
    }
    int ret;
    if (localtask) {
      memoryMXBean = ManagementFactory.getMemoryMXBean();
      MapredLocalWork plan = Utilities.deserializePlan(pathData, MapredLocalWork.class, conf);
      MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
      ret = ed.executeInProcess(new DriverContext());

    } else {
      MapredWork plan = Utilities.deserializePlan(pathData, MapredWork.class, conf);
      ExecDriver ed = new ExecDriver(plan, conf, isSilent);
      ret = ed.execute(new DriverContext());
    }

    if (ret != 0) {
      System.exit(ret);
    }
  }
Exemple #30
0
  /** Execute a query plan using Hadoop. */
  @SuppressWarnings({"deprecation", "unchecked"})
  @Override
  public int execute(DriverContext driverContext) {

    IOPrepareCache ioPrepareCache = IOPrepareCache.get();
    ioPrepareCache.clear();

    boolean success = true;

    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    Path emptyScratchDir;

    MapWork mWork = work.getMapWork();
    ReduceWork rWork = work.getReduceWork();

    try {
      if (ctx == null) {
        ctx = new Context(job);
        ctxCreated = true;
      }

      emptyScratchDir = ctx.getMRTmpPath();
      FileSystem fs = emptyScratchDir.getFileSystem(job);
      fs.mkdirs(emptyScratchDir);
    } catch (IOException e) {
      e.printStackTrace();
      console.printError(
          "Error launching map-reduce job",
          "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return 5;
    }

    HiveFileFormatUtils.prepareJobOutput(job);
    // See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput()
    job.setOutputFormat(HiveOutputFormatImpl.class);

    job.setMapperClass(ExecMapper.class);

    job.setMapOutputKeyClass(HiveKey.class);
    job.setMapOutputValueClass(BytesWritable.class);

    try {
      String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER);
      job.setPartitionerClass(JavaUtils.loadClass(partitioner));
    } catch (ClassNotFoundException e) {
      throw new RuntimeException(e.getMessage(), e);
    }

    if (mWork.getNumMapTasks() != null) {
      job.setNumMapTasks(mWork.getNumMapTasks().intValue());
    }

    if (mWork.getMaxSplitSize() != null) {
      HiveConf.setLongVar(
          job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue());
    }

    if (mWork.getMinSplitSize() != null) {
      HiveConf.setLongVar(
          job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue());
    }

    if (mWork.getMinSplitSizePerNode() != null) {
      HiveConf.setLongVar(
          job,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE,
          mWork.getMinSplitSizePerNode().longValue());
    }

    if (mWork.getMinSplitSizePerRack() != null) {
      HiveConf.setLongVar(
          job,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK,
          mWork.getMinSplitSizePerRack().longValue());
    }

    job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0);
    job.setReducerClass(ExecReducer.class);

    // set input format information if necessary
    setInputAttributes(job);

    // Turn on speculative execution for reducers
    boolean useSpeculativeExecReducers =
        HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
    HiveConf.setBoolVar(
        job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers);

    String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);

    if (mWork.isUseBucketizedHiveInputFormat()) {
      inpFormat = BucketizedHiveInputFormat.class.getName();
    }

    LOG.info("Using " + inpFormat);

    try {
      job.setInputFormat(JavaUtils.loadClass(inpFormat));
    } catch (ClassNotFoundException e) {
      throw new RuntimeException(e.getMessage(), e);
    }

    // No-Op - we don't really write anything here ..
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands
    // it
    String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS);
    String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS);
    if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) {
      String allJars =
          StringUtils.isNotBlank(auxJars)
              ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars)
              : addedJars;
      LOG.info("adding libjars: " + allJars);
      initializeFiles("tmpjars", allJars);
    }

    // Transfer HIVEADDEDFILES to "tmpfiles" so hadoop understands it
    String addedFiles = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDFILES);
    if (StringUtils.isNotBlank(addedFiles)) {
      initializeFiles("tmpfiles", addedFiles);
    }
    int returnVal = 0;
    boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME));

    if (noName) {
      // This is for a special case to ensure unit tests pass
      HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt());
    }
    String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES);
    // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it
    if (StringUtils.isNotBlank(addedArchives)) {
      initializeFiles("tmparchives", addedArchives);
    }

    try {
      MapredLocalWork localwork = mWork.getMapRedLocalWork();
      if (localwork != null && localwork.hasStagedAlias()) {
        if (!ShimLoader.getHadoopShims().isLocalMode(job)) {
          Path localPath = localwork.getTmpPath();
          Path hdfsPath = mWork.getTmpHDFSPath();

          FileSystem hdfs = hdfsPath.getFileSystem(job);
          FileSystem localFS = localPath.getFileSystem(job);
          FileStatus[] hashtableFiles = localFS.listStatus(localPath);
          int fileNumber = hashtableFiles.length;
          String[] fileNames = new String[fileNumber];

          for (int i = 0; i < fileNumber; i++) {
            fileNames[i] = hashtableFiles[i].getPath().getName();
          }

          // package and compress all the hashtable files to an archive file
          String stageId = this.getId();
          String archiveFileName = Utilities.generateTarFileName(stageId);
          localwork.setStageID(stageId);

          CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName);
          Path archivePath = Utilities.generateTarPath(localPath, stageId);
          LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath);

          // upload archive file to hdfs
          Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId);
          short replication = (short) job.getInt("mapred.submit.replication", 10);
          hdfs.copyFromLocalFile(archivePath, hdfsFilePath);
          hdfs.setReplication(hdfsFilePath, replication);
          LOG.info("Upload 1 archive file  from" + archivePath + " to: " + hdfsFilePath);

          // add the archive file to distributed cache
          DistributedCache.createSymlink(job);
          DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job);
          LOG.info(
              "Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri());
        }
      }
      work.configureJobConf(job);
      List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false);
      Utilities.setInputPaths(job, inputPaths);

      Utilities.setMapRedWork(job, work, ctx.getMRTmpPath());

      if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) {
        try {
          handleSampling(ctx, mWork, job);
          job.setPartitionerClass(HiveTotalOrderPartitioner.class);
        } catch (IllegalStateException e) {
          console.printInfo("Not enough sampling data.. Rolling back to single reducer task");
          rWork.setNumReduceTasks(1);
          job.setNumReduceTasks(1);
        } catch (Exception e) {
          LOG.error("Sampling error", e);
          console.printError(
              e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
          rWork.setNumReduceTasks(1);
          job.setNumReduceTasks(1);
        }
      }

      // remove the pwd from conf file so that job tracker doesn't show this
      // logs
      String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
      }
      JobClient jc = new JobClient(job);
      // make this client wait if job tracker is not behaving well.
      Throttle.checkJobTracker(job, LOG);

      if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        StatsFactory factory = StatsFactory.newFactory(job);
        if (factory != null) {
          statsPublisher = factory.getStatsPublisher();
          List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job);
          if (rWork != null) {
            statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job));
          }
          StatsCollectionContext sc = new StatsCollectionContext(job);
          sc.setStatsTmpDirs(statsTmpDir);
          if (!statsPublisher.init(sc)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw new HiveException(
                  ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }
        }
      }

      Utilities.createTmpDirs(job, mWork);
      Utilities.createTmpDirs(job, rWork);

      SessionState ss = SessionState.get();
      if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
          && ss != null) {
        TezSessionState session = ss.getTezSession();
        TezSessionPoolManager.getInstance().close(session, true);
      }

      // Finally SUBMIT the JOB!
      rj = jc.submitJob(job);
      // replace it back
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd);
      }

      returnVal = jobExecHelper.progress(rj, jc, ctx.getHiveTxnManager());
      success = (returnVal == 0);
    } catch (Exception e) {
      e.printStackTrace();
      String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
      if (rj != null) {
        mesg = "Ended Job = " + rj.getJobID() + mesg;
      } else {
        mesg = "Job Submission failed" + mesg;
      }

      // Has to use full name to make sure it does not conflict with
      // org.apache.commons.lang.StringUtils
      console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));

      success = false;
      returnVal = 1;
    } finally {
      Utilities.clearWork(job);
      try {
        if (ctxCreated) {
          ctx.clear();
        }

        if (rj != null) {
          if (returnVal != 0) {
            rj.killJob();
          }
          jobID = rj.getID().toString();
        }
      } catch (Exception e) {
        LOG.warn("Failed while cleaning up ", e);
      } finally {
        HadoopJobExecHelper.runningJobs.remove(rj);
      }
    }

    // get the list of Dynamic partition paths
    try {
      if (rj != null) {
        if (mWork.getAliasToWork() != null) {
          for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) {
            op.jobClose(job, success);
          }
        }
        if (rWork != null) {
          rWork.getReducer().jobClose(job, success);
        }
      }
    } catch (Exception e) {
      // jobClose needs to execute successfully otherwise fail task
      if (success) {
        success = false;
        returnVal = 3;
        String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'";
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
      }
    }

    return (returnVal);
  }