예제 #1
0
  @Override
  public RecordReader<LongWritable, MapWritable> getRecordReader(
      InputSplit split, JobConf conf, Reporter reporter) throws IOException {
    List<Integer> readColIDs = getReadColumnIDs(conf);

    boolean addAll = (readColIDs.size() == 0);

    String columnString = conf.get(ConfigurationUtil.COLUMN_MAPPING);
    if (StringUtils.isBlank(columnString)) {
      throw new IOException("no column mapping found!");
    }

    String[] columns = ConfigurationUtil.getAllColumns(columnString);
    if (readColIDs.size() > columns.length) {
      throw new IOException("read column count larger than that in column mapping string!");
    }

    String[] cols;
    if (addAll) {
      cols = columns;
    } else {
      cols = new String[readColIDs.size()];
      for (int i = 0; i < cols.length; i++) {
        cols[i] = columns[readColIDs.get(i)];
      }
    }
    String filterExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);

    if (filterExprSerialized != null) {
      ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized, conf);
      /*String columnNameProperty = conf.get(
                  org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS);
      System.err.println("======list columns:" + columnNameProperty);*/
      dumpFilterExpr(filterExpr);
      // TODO:
    }

    return new SolrReader(
        ConfigurationUtil.getUrl(conf),
        (SolrSplit) split,
        cols,
        ConfigurationUtil.getNumInputBufferRows(conf));
  }
  /**
   * Converts a filter (which has been pushed down from Hive's optimizer) into corresponding
   * restrictions on the HBase scan. The filter should already be in a form which can be fully
   * converted.
   *
   * @param jobConf configuration for the scan
   * @param iKey 0-based offset of key column within Hive table
   * @return converted table split if any
   */
  private Scan createFilterScan(JobConf jobConf, int iKey, boolean isKeyBinary) throws IOException {

    // TODO: assert iKey is HBaseSerDe#HBASE_KEY_COL

    Scan scan = new Scan();
    String filterObjectSerialized = jobConf.get(TableScanDesc.FILTER_OBJECT_CONF_STR);
    if (filterObjectSerialized != null) {
      HBaseScanRange range =
          Utilities.deserializeObject(filterObjectSerialized, HBaseScanRange.class);
      try {
        range.setup(scan, jobConf);
      } catch (Exception e) {
        throw new IOException(e);
      }
      return scan;
    }

    String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
    if (filterExprSerialized == null) {
      return scan;
    }
    ExprNodeGenericFuncDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized);

    String colName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iKey];
    String colType = jobConf.get(serdeConstants.LIST_COLUMN_TYPES).split(",")[iKey];
    IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(colName, colType, isKeyBinary);

    List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>();
    ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, searchConditions);

    // There should be no residual since we already negotiated that earlier in
    // HBaseStorageHandler.decomposePredicate. However, with hive.optimize.index.filter
    // OpProcFactory#pushFilterToStorageHandler pushes the original filter back down again.
    // Since pushed-down filters are not ommitted at the higher levels (and thus the
    // contract of negotiation is ignored anyway), just ignore the residuals.
    // Re-assess this when negotiation is honored and the duplicate evaluation is removed.
    // THIS IGNORES RESIDUAL PARSING FROM HBaseStorageHandler#decomposePredicate
    if (residualPredicate != null) {
      LOG.debug("Ignoring residual predicate " + residualPredicate.getExprString());
    }

    // Convert the search condition into a restriction on the HBase scan
    byte[] startRow = HConstants.EMPTY_START_ROW, stopRow = HConstants.EMPTY_END_ROW;
    for (IndexSearchCondition sc : searchConditions) {

      ExprNodeConstantEvaluator eval = new ExprNodeConstantEvaluator(sc.getConstantDesc());
      PrimitiveObjectInspector objInspector;
      Object writable;

      try {
        objInspector = (PrimitiveObjectInspector) eval.initialize(null);
        writable = eval.evaluate(null);
      } catch (ClassCastException cce) {
        throw new IOException(
            "Currently only primitve types are supported. Found: "
                + sc.getConstantDesc().getTypeString());
      } catch (HiveException e) {
        throw new IOException(e);
      }

      byte[] constantVal = getConstantVal(writable, objInspector, isKeyBinary);
      String comparisonOp = sc.getComparisonOp();

      if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)) {
        startRow = constantVal;
        stopRow = getNextBA(constantVal);
      } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan"
          .equals(comparisonOp)) {
        stopRow = constantVal;
      } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan"
          .equals(comparisonOp)) {
        startRow = constantVal;
      } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan"
          .equals(comparisonOp)) {
        startRow = getNextBA(constantVal);
      } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan"
          .equals(comparisonOp)) {
        stopRow = getNextBA(constantVal);
      } else {
        throw new IOException(comparisonOp + " is not a supported comparison operator");
      }
    }
    scan.setStartRow(startRow);
    scan.setStopRow(stopRow);

    if (LOG.isDebugEnabled()) {
      LOG.debug(Bytes.toStringBinary(startRow) + " ~ " + Bytes.toStringBinary(stopRow));
    }
    return scan;
  }