예제 #1
0
  /** Initialization when invoked from QL. */
  @Override
  public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) {
    super.initialize(conf, queryPlan, driverContext);

    job = new JobConf(conf, ExecDriver.class);

    // NOTE: initialize is only called if it is in non-local mode.
    // In case it's in non-local mode, we need to move the SessionState files
    // and jars to jobConf.
    // In case it's in local mode, MapRedTask will set the jobConf.
    //
    // "tmpfiles" and "tmpjars" are set by the method ExecDriver.execute(),
    // which will be called by both local and NON-local mode.
    String addedFiles = Utilities.getResourceFiles(job, SessionState.ResourceType.FILE);
    if (StringUtils.isNotBlank(addedFiles)) {
      HiveConf.setVar(job, ConfVars.HIVEADDEDFILES, addedFiles);
    }
    String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
    if (StringUtils.isNotBlank(addedJars)) {
      HiveConf.setVar(job, ConfVars.HIVEADDEDJARS, addedJars);
    }
    String addedArchives = Utilities.getResourceFiles(job, SessionState.ResourceType.ARCHIVE);
    if (StringUtils.isNotBlank(addedArchives)) {
      HiveConf.setVar(job, ConfVars.HIVEADDEDARCHIVES, addedArchives);
    }
    conf.stripHiddenConfigurations(job);
    this.jobExecHelper = new HadoopJobExecHelper(job, console, this, this);
  }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      GroupByOperator op = (GroupByOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> colLists = new ArrayList<String>();
      GroupByDesc conf = op.getConf();
      ArrayList<ExprNodeDesc> keys = conf.getKeys();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      ArrayList<AggregationDesc> aggrs = conf.getAggregators();
      for (AggregationDesc aggr : aggrs) {
        ArrayList<ExprNodeDesc> params = aggr.getParameters();
        for (ExprNodeDesc param : params) {
          colLists = Utilities.mergeUniqElems(colLists, param.getCols());
        }
      }
      int groupingSetPosition = conf.getGroupingSetPosition();
      if (groupingSetPosition >= 0) {
        List<String> cols = cppCtx.genColLists(op);
        String groupingColumn = conf.getOutputColumnNames().get(groupingSetPosition);
        if (!cols.contains(groupingColumn)) {
          conf.getOutputColumnNames().remove(groupingSetPosition);
          if (op.getSchema() != null) {
            op.getSchema().getSignature().remove(groupingSetPosition);
          }
        }
      }

      cppCtx.getPrunedColLists().put(op, colLists);
      return null;
    }
 /*
  * add any input columns referenced in WindowFn args or expressions.
  */
 private ArrayList<String> prunedColumnsList(
     List<String> prunedCols, WindowTableFunctionDef tDef) {
   // we create a copy of prunedCols to create a list of pruned columns for PTFOperator
   ArrayList<String> mergedColList = new ArrayList<String>(prunedCols);
   if (tDef.getWindowFunctions() != null) {
     for (WindowFunctionDef wDef : tDef.getWindowFunctions()) {
       if (wDef.getArgs() == null) {
         continue;
       }
       for (PTFExpressionDef arg : wDef.getArgs()) {
         ExprNodeDesc exprNode = arg.getExprNode();
         Utilities.mergeUniqElems(mergedColList, exprNode.getCols());
       }
     }
   }
   if (tDef.getPartition() != null) {
     for (PTFExpressionDef col : tDef.getPartition().getExpressions()) {
       ExprNodeDesc exprNode = col.getExprNode();
       Utilities.mergeUniqElems(mergedColList, exprNode.getCols());
     }
   }
   if (tDef.getOrder() != null) {
     for (PTFExpressionDef col : tDef.getOrder().getExpressions()) {
       ExprNodeDesc exprNode = col.getExprNode();
       Utilities.mergeUniqElems(mergedColList, exprNode.getCols());
     }
   }
   return mergedColList;
 }
 /**
  * Method to fetch table data
  *
  * @param table table name
  * @param database database
  * @return list of columns in comma seperated way
  * @throws Exception if any error occurs
  */
 private List<String> getTableData(String table, String database) throws Exception {
   HiveConf conf = new HiveConf();
   conf.addResource("hive-site.xml");
   ArrayList<String> results = new ArrayList<String>();
   ArrayList<String> temp = new ArrayList<String>();
   Hive hive = Hive.get(conf);
   org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
   FetchWork work;
   if (!tbl.getPartCols().isEmpty()) {
     List<Partition> partitions = hive.getPartitions(tbl);
     List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
     List<String> partLocs = new ArrayList<String>();
     for (Partition part : partitions) {
       partLocs.add(part.getLocation());
       partDesc.add(Utilities.getPartitionDesc(part));
     }
     work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl));
     work.setLimit(100);
   } else {
     work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl));
   }
   FetchTask task = new FetchTask();
   task.setWork(work);
   task.initialize(conf, null, null);
   task.fetch(temp);
   for (String str : temp) {
     results.add(str.replace("\t", ","));
   }
   return results;
 }
예제 #5
0
  /**
   * Localizes files, archives and jars the user has instructed us to provide on the cluster as
   * resources for execution.
   *
   * @param conf
   * @return List<LocalResource> local resources to add to execution
   * @throws IOException when hdfs operation fails
   * @throws LoginException when getDefaultDestDir fails with the same exception
   */
  public List<LocalResource> localizeTempFilesFromConf(String hdfsDirPathStr, Configuration conf)
      throws IOException, LoginException {
    List<LocalResource> tmpResources = new ArrayList<LocalResource>();

    String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
    if (StringUtils.isNotBlank(addedFiles)) {
      HiveConf.setVar(conf, ConfVars.HIVEADDEDFILES, addedFiles);
    }
    String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
    if (StringUtils.isNotBlank(addedJars)) {
      HiveConf.setVar(conf, ConfVars.HIVEADDEDJARS, addedJars);
    }
    String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE);
    if (StringUtils.isNotBlank(addedArchives)) {
      HiveConf.setVar(conf, ConfVars.HIVEADDEDARCHIVES, addedArchives);
    }

    String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS);

    // need to localize the additional jars and files
    // we need the directory on hdfs to which we shall put all these files
    String allFiles = auxJars + "," + addedJars + "," + addedFiles + "," + addedArchives;
    addTempFiles(conf, tmpResources, hdfsDirPathStr, allFiles.split(","));
    return tmpResources;
  }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      ReduceSinkOperator op = (ReduceSinkOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      RowResolver resolver = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
      ReduceSinkDesc conf = op.getConf();

      List<String> colLists = new ArrayList<String>();
      ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
      LOG.debug("Reduce Sink Operator " + op.getIdentifier() + " key:" + keys);
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      assert op.getNumChild() == 1;

      Operator<? extends OperatorDesc> child = op.getChildOperators().get(0);

      List<String> childCols;
      if (child instanceof CommonJoinOperator) {
        childCols = cppCtx.getJoinPrunedColLists().get(child).get((byte) conf.getTag());
      } else {
        childCols = cppCtx.getPrunedColList(child);
      }
      List<ExprNodeDesc> valCols = conf.getValueCols();
      List<String> valColNames = conf.getOutputValueColumnNames();

      if (childCols != null) {
        boolean[] flags = new boolean[valCols.size()];

        for (String childCol : childCols) {
          int index = valColNames.indexOf(Utilities.removeValueTag(childCol));
          if (index < 0) {
            continue;
          }
          flags[index] = true;
          colLists = Utilities.mergeUniqElems(colLists, valCols.get(index).getCols());
        }

        Collections.sort(colLists);
        pruneReduceSinkOperator(flags, op, cppCtx);
        cppCtx.getPrunedColLists().put(op, colLists);
        return null;
      }

      // Reduce Sink contains the columns needed - no need to aggregate from
      // children
      for (ExprNodeDesc val : valCols) {
        colLists = Utilities.mergeUniqElems(colLists, val.getCols());
      }

      cppCtx.getPrunedColLists().put(op, colLists);
      return null;
    }
예제 #7
0
 public List<String> getReferencedColumns() throws SemanticException {
   MatchPath matchPath = (MatchPath) evaluator;
   List<String> columns = new ArrayList<String>();
   for (ExprNodeDesc exprNode : matchPath.resultExprInfo.resultExprNodes) {
     Utilities.mergeUniqElems(columns, exprNode.getCols());
   }
   for (ExprNodeDesc exprNode : matchPath.symInfo.symbolExprsDecs) {
     Utilities.mergeUniqElems(columns, exprNode.getCols());
   }
   return columns;
 }
예제 #8
0
    public void preTest(HiveConf conf) throws Exception {

      if (zooKeeperCluster == null) {
        // create temp dir
        String tmpBaseDir = System.getProperty("test.tmp.dir");
        File tmpDir = Utilities.createTempDir(tmpBaseDir);

        zooKeeperCluster = new MiniZooKeeperCluster();
        zkPort = zooKeeperCluster.startup(tmpDir);
      }

      if (zooKeeper != null) {
        zooKeeper.close();
      }

      int sessionTimeout =
          (int)
              conf.getTimeVar(
                  HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT, TimeUnit.MILLISECONDS);
      zooKeeper =
          new ZooKeeper(
              "localhost:" + zkPort,
              sessionTimeout,
              new Watcher() {
                @Override
                public void process(WatchedEvent arg0) {}
              });

      String zkServer = "localhost";
      conf.set("hive.zookeeper.quorum", zkServer);
      conf.set("hive.zookeeper.client.port", "" + zkPort);
    }
예제 #9
0
 // TODO#: assumes throw
 private void localizeJarForClass(FileSystem lfs, Path libDir, String className, boolean doThrow)
     throws IOException {
   String jarPath = null;
   boolean hasException = false;
   try {
     Class<?> auxClass = Class.forName(className);
     jarPath = Utilities.jarFinderGetJar(auxClass);
   } catch (Throwable t) {
     if (doThrow) {
       throw (t instanceof IOException) ? (IOException) t : new IOException(t);
     }
     hasException = true;
     String err =
         "Cannot find a jar for ["
             + className
             + "] due to an exception ("
             + t.getMessage()
             + "); not packaging the jar";
     LOG.error(err, t);
     System.err.println(err);
   }
   if (jarPath != null) {
     lfs.copyFromLocalFile(new Path(jarPath), libDir);
   } else if (!hasException) {
     String err = "Cannot find a jar for [" + className + "]; not packaging the jar";
     if (doThrow) {
       throw new IOException(err);
     }
     LOG.error(err);
     System.err.println(err);
   }
 }
예제 #10
0
  // reloading the jars under the path specified in hive.reloadable.aux.jars.path property
  public void reloadAuxJars() throws IOException {
    final Set<String> reloadedAuxJars = new HashSet<String>();

    final String renewableJarPath = conf.getVar(ConfVars.HIVERELOADABLEJARS);
    // do nothing if this property is not specified or empty
    if (renewableJarPath == null || renewableJarPath.isEmpty()) {
      return;
    }

    Set<String> jarPaths = Utilities.getJarFilesByPath(renewableJarPath);

    // load jars under the hive.reloadable.aux.jars.path
    if (!jarPaths.isEmpty()) {
      reloadedAuxJars.addAll(jarPaths);
    }

    // remove the previous renewable jars
    try {
      if (preReloadableAuxJars != null && !preReloadableAuxJars.isEmpty()) {
        Utilities.removeFromClassPath(preReloadableAuxJars.toArray(new String[0]));
      }
    } catch (Exception e) {
      String msg = "Fail to remove the reloaded jars loaded last time: " + e;
      throw new IOException(msg, e);
    }

    try {
      if (reloadedAuxJars != null && !reloadedAuxJars.isEmpty()) {
        URLClassLoader currentCLoader =
            (URLClassLoader) SessionState.get().getConf().getClassLoader();
        currentCLoader =
            (URLClassLoader)
                Utilities.addToClassPath(currentCLoader, reloadedAuxJars.toArray(new String[0]));
        conf.setClassLoader(currentCLoader);
        Thread.currentThread().setContextClassLoader(currentCLoader);
      }
      preReloadableAuxJars.clear();
      preReloadableAuxJars.addAll(reloadedAuxJars);
    } catch (Exception e) {
      String msg =
          "Fail to add jars from the path specified in hive.reloadable.aux.jars.path property: "
              + e;
      throw new IOException(msg, e);
    }
  }
예제 #11
0
  /*
   * Helper function to create Vertex for given ReduceWork.
   */
  private Vertex createVertex(
      JobConf conf,
      ReduceWork reduceWork,
      LocalResource appJarLr,
      List<LocalResource> additionalLr,
      FileSystem fs,
      Path mrScratchDir,
      Context ctx)
      throws Exception {

    // set up operator plan
    Utilities.setReduceWork(conf, reduceWork, mrScratchDir, false);

    // create the directories FileSinkOperators need
    Utilities.createTmpDirs(conf, reduceWork);

    // Call once here, will be updated when we find edges
    MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null);

    // create the vertex
    Vertex reducer =
        new Vertex(
            reduceWork.getName(),
            new ProcessorDescriptor(ReduceTezProcessor.class.getName())
                .setUserPayload(MRHelpers.createUserPayloadFromConf(conf)),
            reduceWork.getNumReduceTasks(),
            getContainerResource(conf));

    Map<String, String> environment = new HashMap<String, String>();

    MRHelpers.updateEnvironmentForMRTasks(conf, environment, false);
    reducer.setTaskEnvironment(environment);

    reducer.setJavaOpts(getContainerJavaOpts(conf));

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(getBaseName(appJarLr), appJarLr);
    for (LocalResource lr : additionalLr) {
      localResources.put(getBaseName(lr), lr);
    }
    reducer.setTaskLocalResources(localResources);

    return reducer;
  }
예제 #12
0
  public String cliInit(String tname, boolean recreate) throws Exception {
    if (recreate) {
      cleanUp();
      createSources();
    }

    HiveConf.setVar(
        conf,
        HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER,
        "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator");
    Utilities.clearWorkMap();
    CliSessionState ss = new CliSessionState(conf);
    assert ss != null;
    ss.in = System.in;

    String outFileExtension = getOutFileExtension(tname);
    String stdoutName = null;
    if (outDir != null) {
      File qf = new File(outDir, tname);
      stdoutName = qf.getName().concat(outFileExtension);
    } else {
      stdoutName = tname + outFileExtension;
    }

    File outf = new File(logDir, stdoutName);
    OutputStream fo = new BufferedOutputStream(new FileOutputStream(outf));
    if (qSortQuerySet.contains(tname)) {
      ss.out = new SortPrintStream(fo, "UTF-8");
    } else if (qHashQuerySet.contains(tname)) {
      ss.out = new DigestPrintStream(fo, "UTF-8");
    } else if (qSortNHashQuerySet.contains(tname)) {
      ss.out = new SortAndDigestPrintStream(fo, "UTF-8");
    } else {
      ss.out = new PrintStream(fo, true, "UTF-8");
    }
    ss.err = new CachingPrintStream(fo, true, "UTF-8");
    ss.setIsSilent(true);
    SessionState oldSs = SessionState.get();

    if (oldSs != null && clusterType == MiniClusterType.tez) {
      oldSs.close();
    }

    if (oldSs != null && oldSs.out != null && oldSs.out != System.out) {
      oldSs.out.close();
    }
    SessionState.start(ss);

    cliDriver = new CliDriver();
    cliDriver.processInitFiles(ss);

    return outf.getAbsolutePath();
  }
예제 #13
0
  public int executeInProcess(DriverContext driverContext) {
    // check the local work
    if (work == null) {
      return -1;
    }

    if (execContext == null) {
      execContext = new ExecMapperContext(job);
    }

    memoryMXBean = ManagementFactory.getMemoryMXBean();
    long startTime = System.currentTimeMillis();
    console.printInfo(
        Utilities.now()
            + "\tStarting to launch local task to process map join;\tmaximum memory = "
            + memoryMXBean.getHeapMemoryUsage().getMax());
    execContext.setJc(job);
    // set the local work, so all the operator can get this context
    execContext.setLocalWork(work);
    try {
      startForward(null);
      long currentTime = System.currentTimeMillis();
      long elapsed = currentTime - startTime;
      console.printInfo(
          Utilities.now()
              + "\tEnd of local task; Time Taken: "
              + Utilities.showTime(elapsed)
              + " sec.");
    } catch (Throwable throwable) {
      if (throwable instanceof OutOfMemoryError
          || (throwable instanceof MapJoinMemoryExhaustionException)) {
        l4j.error("Hive Runtime Error: Map local work exhausted memory", throwable);
        return 3;
      } else {
        l4j.error("Hive Runtime Error: Map local work failed", throwable);
        return 2;
      }
    }
    return 0;
  }
예제 #14
0
    /**
     * Separate from constructor, because initialize() may need to be called in a separate thread.
     */
    synchronized void initialize() {
      assertState(QueryState.CREATED);
      this.hiveConf = new HiveConf(Driver.class);

      // Update configuration with user/group info.
      if (query.hadoop_user == null) {
        throw new RuntimeException("User must be specified.");
      }

      // Update scratch dir (to have one per user)
      File scratchDir = new File("/tmp/hive-beeswax-" + query.hadoop_user);
      hiveConf.set(HiveConf.ConfVars.SCRATCHDIR.varname, scratchDir.getPath());
      // Create the temporary directory if necessary.
      // If mapred.job.tracker is set to local, this is used by MapRedTask.
      if (!scratchDir.isDirectory()) {
        if (scratchDir.exists() || !scratchDir.mkdirs()) {
          LOG.warn("Could not create tmp dir:" + scratchDir);
        }
      }

      driver = new Driver(hiveConf);
      ClassLoader loader = hiveConf.getClassLoader();
      String auxJars = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEAUXJARS);
      if (StringUtils.isNotBlank(auxJars)) {
        try {
          loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","));
        } catch (Exception e) {
          LOG.error("Failed to add jars to class loader: " + auxJars, e);
        }
      }
      hiveConf.setClassLoader(loader);
      Thread.currentThread().setContextClassLoader(loader);
      SessionState.start(hiveConf); // this is thread-local
      this.sessionState = SessionState.get();

      // If this work has a LogContext, associate the children output to the logContext
      OutputStream lcOutStream = null;
      if (this.logContext != null) lcOutStream = this.logContext.getOutputStream();

      // A copy of everything goes to the LogContext.
      // In addition, stderr goes to errStream for error reporting.
      // Note that child output is explicitly tee to System.{out,err},
      // otherwise it'll be swallowed by outStream.
      this.sessionState.out = new PrintStream(new TeeOutputStream(lcOutStream, this.outStream));
      this.sessionState.err = new PrintStream(new TeeOutputStream(lcOutStream, this.errStream));
      this.sessionState.childOut =
          new PrintStream(new TeeOutputStream(System.out, sessionState.out));
      this.sessionState.childErr =
          new PrintStream(new TeeOutputStream(System.err, sessionState.err));

      this.state = QueryState.INITIALIZED;
    }
예제 #15
0
 public static boolean unregisterJar(String jarsToUnregister) {
   LogHelper console = getConsole();
   try {
     Utilities.removeFromClassPath(StringUtils.split(jarsToUnregister, ","));
     console.printInfo("Deleted " + jarsToUnregister + " from class path");
     return true;
   } catch (Exception e) {
     console.printError(
         "Unable to unregister " + jarsToUnregister + "\nException: " + e.getMessage(),
         "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
     return false;
   }
 }
 /**
  * get the final output path of a given FileOutputFormat.
  *
  * @param parent parent dir of the expected final output path
  * @param jc job configuration
  */
 public static Path getOutputFormatFinalPath(
     Path parent,
     String taskId,
     JobConf jc,
     HiveOutputFormat<?, ?> hiveOutputFormat,
     boolean isCompressed,
     Path defaultFinalPath)
     throws IOException {
   if (hiveOutputFormat instanceof HiveIgnoreKeyTextOutputFormat) {
     return new Path(parent, taskId + Utilities.getFileExtension(jc, isCompressed));
   }
   return defaultFinalPath;
 }
예제 #17
0
 static void registerJars(List<String> newJars) throws IllegalArgumentException {
   LogHelper console = getConsole();
   try {
     ClassLoader loader = Thread.currentThread().getContextClassLoader();
     ClassLoader newLoader = Utilities.addToClassPath(loader, newJars.toArray(new String[0]));
     Thread.currentThread().setContextClassLoader(newLoader);
     SessionState.get().getConf().setClassLoader(newLoader);
     console.printInfo("Added " + newJars + " to class path");
   } catch (Exception e) {
     String message = "Unable to register " + newJars;
     throw new IllegalArgumentException(message, e);
   }
 }
예제 #18
0
파일: ATSHook.java 프로젝트: apache/hive
  protected ExecutionMode getExecutionMode(QueryPlan plan) {
    int numMRJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
    int numSparkJobs = Utilities.getSparkTasks(plan.getRootTasks()).size();
    int numTezJobs = Utilities.getTezTasks(plan.getRootTasks()).size();

    ExecutionMode mode = ExecutionMode.MR;
    if (0 == (numMRJobs + numSparkJobs + numTezJobs)) {
      mode = ExecutionMode.NONE;
    } else if (numSparkJobs > 0) {
      return ExecutionMode.SPARK;
    } else if (numTezJobs > 0) {
      mode = ExecutionMode.TEZ;
      // Need to go in and check if any of the tasks is running in LLAP mode.
      for (TezTask tezTask : Utilities.getTezTasks(plan.getRootTasks())) {
        if (tezTask.getWork().getLlapMode()) {
          mode = ExecutionMode.LLAP;
          break;
        }
      }
    }

    return mode;
  }
 private Object deserializeValue(BytesWritable valueWritable, byte tag) throws HiveException {
   try {
     return inputValueDeserializer[tag].deserialize(valueWritable);
   } catch (SerDeException e) {
     throw new HiveException(
         "Error: Unable to deserialize reduce input value (tag="
             + tag
             + ") from "
             + Utilities.formatBinaryString(valueWritable.getBytes(), 0, valueWritable.getLength())
             + " with properties "
             + valueTableDesc[tag].getProperties(),
         e);
   }
 }
예제 #20
0
 public static boolean registerJar(String newJar) {
   LogHelper console = getConsole();
   try {
     ClassLoader loader = Thread.currentThread().getContextClassLoader();
     Thread.currentThread()
         .setContextClassLoader(Utilities.addToClassPath(loader, StringUtils.split(newJar, ",")));
     console.printInfo("Added " + newJar + " to class path");
     return true;
   } catch (Exception e) {
     console.printError(
         "Unable to register " + newJar + "\nException: " + e.getMessage(),
         "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
     return false;
   }
 }
예제 #21
0
  static void validateFiles(List<String> newFiles) throws IllegalArgumentException {
    SessionState ss = SessionState.get();
    Configuration conf = (ss == null) ? new Configuration() : ss.getConf();

    for (String newFile : newFiles) {
      try {
        if (Utilities.realFile(newFile, conf) == null) {
          String message = newFile + " does not exist";
          throw new IllegalArgumentException(message);
        }
      } catch (IOException e) {
        String message = "Unable to validate " + newFile;
        throw new IllegalArgumentException(message, e);
      }
    }
  }
예제 #22
0
  /*
   * Creates the configuration object necessary to run a specific vertex from
   * map work. This includes input formats, input processor, etc.
   */
  private JobConf initializeVertexConf(JobConf baseConf, MapWork mapWork) {
    JobConf conf = new JobConf(baseConf);

    if (mapWork.getNumMapTasks() != null) {
      conf.setInt(MRJobConfig.NUM_MAPS, mapWork.getNumMapTasks().intValue());
    }

    if (mapWork.getMaxSplitSize() != null) {
      HiveConf.setLongVar(
          conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mapWork.getMaxSplitSize().longValue());
    }

    if (mapWork.getMinSplitSize() != null) {
      HiveConf.setLongVar(
          conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mapWork.getMinSplitSize().longValue());
    }

    if (mapWork.getMinSplitSizePerNode() != null) {
      HiveConf.setLongVar(
          conf,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE,
          mapWork.getMinSplitSizePerNode().longValue());
    }

    if (mapWork.getMinSplitSizePerRack() != null) {
      HiveConf.setLongVar(
          conf,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK,
          mapWork.getMinSplitSizePerRack().longValue());
    }

    Utilities.setInputAttributes(conf, mapWork);

    String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT);
    if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) {
      inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName();
    }

    if (mapWork.isUseBucketizedHiveInputFormat()) {
      inpFormat = BucketizedHiveInputFormat.class.getName();
    }

    conf.set("mapred.mapper.class", ExecMapper.class.getName());
    conf.set("mapred.input.format.class", inpFormat);

    return conf;
  }
예제 #23
0
  /**
   * Create a MapReduce job for a particular partition if Hadoop version is pre 0.20, otherwise
   * create a Map-only job using CombineHiveInputFormat for all partitions.
   *
   * @param fsOp The FileSink operator.
   * @param ctx The MR processing context.
   * @param finalName the final destination path the merge job should output.
   * @throws SemanticException
   */
  private void createMergeJob(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName)
      throws SemanticException {

    // if the hadoop version support CombineFileInputFormat (version >= 0.20),
    // create a Map-only job for merge, otherwise create a MapReduce merge job.
    ParseContext parseCtx = ctx.getParseCtx();
    HiveConf conf = parseCtx.getConf();
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPONLY)
        && Utilities.supportCombineFileInputFormat()) {
      // create Map-only merge job
      createMap4Merge(fsOp, ctx, finalName);
      LOG.info("use CombineHiveInputformat for the merge job");
    } else {
      createMapReduce4Merge(fsOp, ctx, finalName);
      LOG.info("use HiveInputFormat for the merge job");
    }
  }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LateralViewJoinOperator op = (LateralViewJoinOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> cols = cppCtx.genColLists(op);
      if (cols == null) {
        return null;
      }

      Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
      // As columns go down the DAG, the LVJ will transform internal column
      // names from something like 'key' to '_col0'. Because of this, we need
      // to undo this transformation using the column expression map as the
      // column names propagate up the DAG.

      // this is SEL(*) cols + UDTF cols
      List<String> outputCols = op.getConf().getOutputInternalColNames();

      // cause we cannot prune columns from UDTF branch currently, extract
      // columns from SEL(*) branch only and append all columns from UDTF branch to it
      int numSelColumns = op.getConf().getNumSelColumns();

      List<String> colsAfterReplacement = new ArrayList<String>();
      ArrayList<String> newColNames = new ArrayList<String>();
      for (String col : cols) {
        int index = outputCols.indexOf(col);
        // colExprMap.size() == size of cols from SEL(*) branch
        if (index >= 0 && index < numSelColumns) {
          ExprNodeDesc transformed = colExprMap.get(col);
          Utilities.mergeUniqElems(colsAfterReplacement, transformed.getCols());
          newColNames.add(col);
        }
      }
      // update number of columns from sel(*)
      op.getConf().setNumSelColumns(newColNames.size());

      // add all UDTF columns
      // following SEL will do CP for columns from UDTF, not adding SEL in here
      newColNames.addAll(outputCols.subList(numSelColumns, outputCols.size()));
      op.getConf().setOutputInternalColNames(newColNames);
      pruneOperator(ctx, op, newColNames);
      cppCtx.getPrunedColLists().put(op, colsAfterReplacement);
      return null;
    }
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      FilterOperator op = (FilterOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      ExprNodeDesc condn = op.getConf().getPredicate();
      // get list of columns used in the filter
      List<String> cl = condn.getCols();
      // merge it with the downstream col list
      List<String> filterOpPrunedColLists = Utilities.mergeUniqElems(cppCtx.genColLists(op), cl);
      List<String> filterOpPrunedColListsOrderPreserved =
          preserveColumnOrder(op, filterOpPrunedColLists);
      cppCtx.getPrunedColLists().put(op, filterOpPrunedColListsOrderPreserved);

      pruneOperator(cppCtx, op, cppCtx.getPrunedColLists().get(op));

      return null;
    }
예제 #26
0
    private Schema getSchema(InputSplit split, JobConf job) {
      // Inside of a MR job, we can pull out the actual properties
      if (AvroSerdeUtils.insideMRJob(job)) {
        MapWork mapWork = Utilities.getMapWork(job);

        // Iterate over the Path -> Partition descriptions to find the partition
        // that matches our input split.
        for (Map.Entry<String, PartitionDesc> pathsAndParts :
            mapWork.getPathToPartitionInfo().entrySet()) {
          String partitionPath = pathsAndParts.getKey();
          if (pathIsInPartition(((FileSplit) split).getPath(), partitionPath)) {
            if (LOG.isInfoEnabled()) {
              LOG.info("Matching partition " + partitionPath + " with input split " + split);
            }

            Properties props = pathsAndParts.getValue().getProperties();
            if (props.containsKey(AvroSerdeUtils.SCHEMA_LITERAL)
                || props.containsKey(AvroSerdeUtils.SCHEMA_URL)) {
              try {
                return AvroSerdeUtils.determineSchemaOrThrowException(props);
              } catch (Exception e) {
                throw new RuntimeException("Avro serde exception", e);
              }
            } else {
              return null; // If it's not in this property, it won't be in any others
            }
          }
        }
        if (LOG.isInfoEnabled()) {
          LOG.info("Unable to match filesplit " + split + " with a partition.");
        }
      }

      // In "select * from table" situations (non-MR), we can add things to the job
      // It's safe to add this to the job since it's not *actually* a mapred job.
      // Here the global state is confined to just this process.
      String s = job.get(AvroSerdeUtils.AVRO_SERDE_SCHEMA);
      if (s != null) {
        LOG.info("Found the avro schema in the job: " + s);
        return Schema.parse(s);
      }
      // No more places to get the schema from. Give up.  May have to re-encode later.
      return null;
    }
예제 #27
0
  @Override
  public RecordReader<LongWritable, MapWritable> getRecordReader(
      InputSplit split, JobConf conf, Reporter reporter) throws IOException {
    List<Integer> readColIDs = getReadColumnIDs(conf);

    boolean addAll = (readColIDs.size() == 0);

    String columnString = conf.get(ConfigurationUtil.COLUMN_MAPPING);
    if (StringUtils.isBlank(columnString)) {
      throw new IOException("no column mapping found!");
    }

    String[] columns = ConfigurationUtil.getAllColumns(columnString);
    if (readColIDs.size() > columns.length) {
      throw new IOException("read column count larger than that in column mapping string!");
    }

    String[] cols;
    if (addAll) {
      cols = columns;
    } else {
      cols = new String[readColIDs.size()];
      for (int i = 0; i < cols.length; i++) {
        cols[i] = columns[readColIDs.get(i)];
      }
    }
    String filterExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);

    if (filterExprSerialized != null) {
      ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized, conf);
      /*String columnNameProperty = conf.get(
                  org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS);
      System.err.println("======list columns:" + columnNameProperty);*/
      dumpFilterExpr(filterExpr);
      // TODO:
    }

    return new SolrReader(
        ConfigurationUtil.getUrl(conf),
        (SolrSplit) split,
        cols,
        ConfigurationUtil.getNumInputBufferRows(conf));
  }
예제 #28
0
  public static String validateFile(Set<String> curFiles, String newFile) {
    SessionState ss = SessionState.get();
    LogHelper console = getConsole();
    Configuration conf = (ss == null) ? new Configuration() : ss.getConf();

    try {
      if (Utilities.realFile(newFile, conf) != null) {
        return newFile;
      } else {
        console.printError(newFile + " does not exist");
        return null;
      }
    } catch (IOException e) {
      console.printError(
          "Unable to validate " + newFile + "\nException: " + e.getMessage(),
          "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return null;
    }
  }
예제 #29
0
 public GenericUDAFEvaluator getGenericUDAFEvaluator() {
   if (genericUDAFEvaluator != null) {
     return genericUDAFEvaluator;
   }
   if (genericUDAFWritableEvaluator != null) {
     return genericUDAFEvaluator = genericUDAFWritableEvaluator;
   }
   try {
     return genericUDAFEvaluator =
         ReflectionUtils.newInstance(
             Class.forName(
                     genericUDAFEvaluatorClassName,
                     true,
                     Utilities.getSessionSpecifiedClassLoader())
                 .asSubclass(GenericUDAFEvaluator.class),
             null);
   } catch (ClassNotFoundException e) {
     throw new RuntimeException(e);
   }
 }
  @Override
  public void close() {

    // No row was processed
    if (oc == null) {
      LOG.trace("Close called without any rows processed");
    }

    try {
      if (groupKey != null) {
        // If a operator wants to do some work at the end of a group
        LOG.trace("End Group");
        reducer.endGroup();
      }
      if (isLogInfoEnabled) {
        logCloseInfo();
      }

      reducer.close(abort);

      if (localWork != null) {
        for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
          dummyOp.close(abort);
        }
      }

      ReportStats rps = new ReportStats(rp, jc);
      reducer.preorderMap(rps);

    } catch (Exception e) {
      if (!abort) {
        // signal new failure to map-reduce
        LOG.error("Hit error while closing operators - failing tree");
        throw new RuntimeException(
            "Hive Runtime Error while closing operators: " + e.getMessage(), e);
      }
    } finally {
      MapredContext.close();
      Utilities.clearWorkMap();
    }
  }