@Override public RecordReader<LongWritable, MapWritable> getRecordReader( InputSplit split, JobConf conf, Reporter reporter) throws IOException { List<Integer> readColIDs = getReadColumnIDs(conf); boolean addAll = (readColIDs.size() == 0); String columnString = conf.get(ConfigurationUtil.COLUMN_MAPPING); if (StringUtils.isBlank(columnString)) { throw new IOException("no column mapping found!"); } String[] columns = ConfigurationUtil.getAllColumns(columnString); if (readColIDs.size() > columns.length) { throw new IOException("read column count larger than that in column mapping string!"); } String[] cols; if (addAll) { cols = columns; } else { cols = new String[readColIDs.size()]; for (int i = 0; i < cols.length; i++) { cols[i] = columns[readColIDs.get(i)]; } } String filterExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (filterExprSerialized != null) { ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized, conf); /*String columnNameProperty = conf.get( org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS); System.err.println("======list columns:" + columnNameProperty);*/ dumpFilterExpr(filterExpr); // TODO: } return new SolrReader( ConfigurationUtil.getUrl(conf), (SolrSplit) split, cols, ConfigurationUtil.getNumInputBufferRows(conf)); }
/** * Converts a filter (which has been pushed down from Hive's optimizer) into corresponding * restrictions on the HBase scan. The filter should already be in a form which can be fully * converted. * * @param jobConf configuration for the scan * @param iKey 0-based offset of key column within Hive table * @return converted table split if any */ private Scan createFilterScan(JobConf jobConf, int iKey, boolean isKeyBinary) throws IOException { // TODO: assert iKey is HBaseSerDe#HBASE_KEY_COL Scan scan = new Scan(); String filterObjectSerialized = jobConf.get(TableScanDesc.FILTER_OBJECT_CONF_STR); if (filterObjectSerialized != null) { HBaseScanRange range = Utilities.deserializeObject(filterObjectSerialized, HBaseScanRange.class); try { range.setup(scan, jobConf); } catch (Exception e) { throw new IOException(e); } return scan; } String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (filterExprSerialized == null) { return scan; } ExprNodeGenericFuncDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized); String colName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iKey]; String colType = jobConf.get(serdeConstants.LIST_COLUMN_TYPES).split(",")[iKey]; IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(colName, colType, isKeyBinary); List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>(); ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, searchConditions); // There should be no residual since we already negotiated that earlier in // HBaseStorageHandler.decomposePredicate. However, with hive.optimize.index.filter // OpProcFactory#pushFilterToStorageHandler pushes the original filter back down again. // Since pushed-down filters are not ommitted at the higher levels (and thus the // contract of negotiation is ignored anyway), just ignore the residuals. // Re-assess this when negotiation is honored and the duplicate evaluation is removed. // THIS IGNORES RESIDUAL PARSING FROM HBaseStorageHandler#decomposePredicate if (residualPredicate != null) { LOG.debug("Ignoring residual predicate " + residualPredicate.getExprString()); } // Convert the search condition into a restriction on the HBase scan byte[] startRow = HConstants.EMPTY_START_ROW, stopRow = HConstants.EMPTY_END_ROW; for (IndexSearchCondition sc : searchConditions) { ExprNodeConstantEvaluator eval = new ExprNodeConstantEvaluator(sc.getConstantDesc()); PrimitiveObjectInspector objInspector; Object writable; try { objInspector = (PrimitiveObjectInspector) eval.initialize(null); writable = eval.evaluate(null); } catch (ClassCastException cce) { throw new IOException( "Currently only primitve types are supported. Found: " + sc.getConstantDesc().getTypeString()); } catch (HiveException e) { throw new IOException(e); } byte[] constantVal = getConstantVal(writable, objInspector, isKeyBinary); String comparisonOp = sc.getComparisonOp(); if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)) { startRow = constantVal; stopRow = getNextBA(constantVal); } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan" .equals(comparisonOp)) { stopRow = constantVal; } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan" .equals(comparisonOp)) { startRow = constantVal; } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan" .equals(comparisonOp)) { startRow = getNextBA(constantVal); } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan" .equals(comparisonOp)) { stopRow = getNextBA(constantVal); } else { throw new IOException(comparisonOp + " is not a supported comparison operator"); } } scan.setStartRow(startRow); scan.setStopRow(stopRow); if (LOG.isDebugEnabled()) { LOG.debug(Bytes.toStringBinary(startRow) + " ~ " + Bytes.toStringBinary(stopRow)); } return scan; }