public static IndexPredicateAnalyzer createAnalyzer(boolean equalOnly) { IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer(); analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual"); if (equalOnly) { return analyzer; } analyzer.addComparisonOp( "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan"); analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan"); analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan"); analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan"); return analyzer; }
/** * Instantiates a new predicate analyzer suitable for determining how to push a filter down into * the HBase scan, based on the rules for what kinds of pushdown we currently support. * * @param keyColumnName name of the Hive column mapped to the HBase row key * @return preconfigured predicate analyzer */ static IndexPredicateAnalyzer newIndexPredicateAnalyzer( String keyColumnName, String keyColType, boolean isKeyBinary) { IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer(); // We can always do equality predicate. Just need to make sure we get appropriate // BA representation of constant of filter condition. analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual"); // We can do other comparisons only if storage format in hbase is either binary // or we are dealing with string types since there lexographic ordering will suffice. if (isKeyBinary || (keyColType.equalsIgnoreCase("string"))) { analyzer.addComparisonOp( "org.apache.hadoop.hive.ql.udf.generic." + "GenericUDFOPEqualOrGreaterThan"); analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan"); analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan"); analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan"); } // and only on the key column analyzer.clearAllowedColumnNames(); analyzer.allowColumnName(keyColumnName); return analyzer; }
/** * Converts a filter (which has been pushed down from Hive's optimizer) into corresponding * restrictions on the HBase scan. The filter should already be in a form which can be fully * converted. * * @param jobConf configuration for the scan * @param iKey 0-based offset of key column within Hive table * @return converted table split if any */ private Scan createFilterScan(JobConf jobConf, int iKey, boolean isKeyBinary) throws IOException { // TODO: assert iKey is HBaseSerDe#HBASE_KEY_COL Scan scan = new Scan(); String filterObjectSerialized = jobConf.get(TableScanDesc.FILTER_OBJECT_CONF_STR); if (filterObjectSerialized != null) { HBaseScanRange range = Utilities.deserializeObject(filterObjectSerialized, HBaseScanRange.class); try { range.setup(scan, jobConf); } catch (Exception e) { throw new IOException(e); } return scan; } String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (filterExprSerialized == null) { return scan; } ExprNodeGenericFuncDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized); String colName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iKey]; String colType = jobConf.get(serdeConstants.LIST_COLUMN_TYPES).split(",")[iKey]; IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(colName, colType, isKeyBinary); List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>(); ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, searchConditions); // There should be no residual since we already negotiated that earlier in // HBaseStorageHandler.decomposePredicate. However, with hive.optimize.index.filter // OpProcFactory#pushFilterToStorageHandler pushes the original filter back down again. // Since pushed-down filters are not ommitted at the higher levels (and thus the // contract of negotiation is ignored anyway), just ignore the residuals. // Re-assess this when negotiation is honored and the duplicate evaluation is removed. // THIS IGNORES RESIDUAL PARSING FROM HBaseStorageHandler#decomposePredicate if (residualPredicate != null) { LOG.debug("Ignoring residual predicate " + residualPredicate.getExprString()); } // Convert the search condition into a restriction on the HBase scan byte[] startRow = HConstants.EMPTY_START_ROW, stopRow = HConstants.EMPTY_END_ROW; for (IndexSearchCondition sc : searchConditions) { ExprNodeConstantEvaluator eval = new ExprNodeConstantEvaluator(sc.getConstantDesc()); PrimitiveObjectInspector objInspector; Object writable; try { objInspector = (PrimitiveObjectInspector) eval.initialize(null); writable = eval.evaluate(null); } catch (ClassCastException cce) { throw new IOException( "Currently only primitve types are supported. Found: " + sc.getConstantDesc().getTypeString()); } catch (HiveException e) { throw new IOException(e); } byte[] constantVal = getConstantVal(writable, objInspector, isKeyBinary); String comparisonOp = sc.getComparisonOp(); if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)) { startRow = constantVal; stopRow = getNextBA(constantVal); } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan" .equals(comparisonOp)) { stopRow = constantVal; } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan" .equals(comparisonOp)) { startRow = constantVal; } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan" .equals(comparisonOp)) { startRow = getNextBA(constantVal); } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan" .equals(comparisonOp)) { stopRow = getNextBA(constantVal); } else { throw new IOException(comparisonOp + " is not a supported comparison operator"); } } scan.setStartRow(startRow); scan.setStopRow(stopRow); if (LOG.isDebugEnabled()) { LOG.debug(Bytes.toStringBinary(startRow) + " ~ " + Bytes.toStringBinary(stopRow)); } return scan; }