public static void setupNeededColumns( TableScanOperator scanOp, RowResolver inputRR, List<String> cols) throws SemanticException { List<Integer> neededColumnIds = new ArrayList<Integer>(); List<String> neededColumnNames = new ArrayList<String>(); List<String> referencedColumnNames = new ArrayList<String>(); TableScanDesc desc = scanOp.getConf(); List<VirtualColumn> virtualCols = desc.getVirtualCols(); List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>(); // add virtual columns for ANALYZE TABLE if (scanOp.getConf().isGatherStats()) { cols.add(VirtualColumn.RAWDATASIZE.getName()); } for (String column : cols) { String[] tabCol = inputRR.reverseLookup(column); if (tabCol == null) { continue; } referencedColumnNames.add(column); ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]); if (colInfo.getIsVirtualCol()) { // part is also a virtual column, but part col should not in this // list. for (int j = 0; j < virtualCols.size(); j++) { VirtualColumn vc = virtualCols.get(j); if (vc.getName().equals(colInfo.getInternalName())) { newVirtualCols.add(vc); } } // no need to pass virtual columns to reader. continue; } int position = inputRR.getPosition(column); if (position >= 0) { // get the needed columns by id and name neededColumnIds.add(position); neededColumnNames.add(column); } } desc.setVirtualCols(newVirtualCols); scanOp.setNeededColumnIDs(neededColumnIds); scanOp.setNeededColumns(neededColumnNames); scanOp.setReferencedColumns(referencedColumnNames); }
private MapOpCtx initObjectInspector( Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception { PartitionDesc pd = opCtx.partDesc; TableDesc td = pd.getTableDesc(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition // taking precedence, in the case of partitioned tables Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties()); Map<String, String> partSpec = pd.getPartSpec(); opCtx.tableName = String.valueOf(overlayedProps.getProperty("name")); opCtx.partName = String.valueOf(partSpec); opCtx.deserializer = pd.getDeserializer(hconf); StructObjectInspector partRawRowObjectInspector; if (Utilities.isInputFileFormatSelfDescribing(pd)) { partRawRowObjectInspector = tableRowOI; } else { partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector(); } opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI); // Next check if this table has partitions and if so // get the list of partition names as well as allocate // the serdes for the partition columns String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); if (pcols != null && pcols.length() > 0) { String[] partKeys = pcols.trim().split("/"); String pcolTypes = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); String[] partKeyTypes = pcolTypes.trim().split(":"); if (partKeys.length > partKeyTypes.length) { throw new HiveException( "Internal error : partKeys length, " + partKeys.length + " greater than partKeyTypes length, " + partKeyTypes.length); } List<String> partNames = new ArrayList<String>(partKeys.length); Object[] partValues = new Object[partKeys.length]; List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length); for (int i = 0; i < partKeys.length; i++) { String key = partKeys[i]; partNames.add(key); ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); // Partitions do not exist for this table if (partSpec == null) { // for partitionless table, initialize partValue to null partValues[i] = null; } else { partValues[i] = ObjectInspectorConverters.getConverter( PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi) .convert(partSpec.get(key)); } partObjectInspectors.add(oi); } opCtx.rowWithPart = new Object[] {null, partValues}; opCtx.partObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors); } // The op may not be a TableScan for mapjoins // Consider the query: select /*+MAPJOIN(a)*/ count(*) FROM T1 a JOIN T2 b ON a.key = b.key; // In that case, it will be a Select, but the rowOI need not be amended if (opCtx.op instanceof TableScanOperator) { TableScanOperator tsOp = (TableScanOperator) opCtx.op; TableScanDesc tsDesc = tsOp.getConf(); if (tsDesc != null && tsDesc.hasVirtualCols()) { opCtx.vcs = tsDesc.getVirtualCols(); opCtx.vcValues = new Object[opCtx.vcs.size()]; opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs); if (opCtx.isPartitioned()) { opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3); } else { opCtx.rowWithPartAndVC = new Object[2]; } } } if (!opCtx.hasVC() && !opCtx.isPartitioned()) { opCtx.rowObjectInspector = tableRowOI; return opCtx; } List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>(); inspectors.add(tableRowOI); if (opCtx.isPartitioned()) { inspectors.add(opCtx.partObjectInspector); } if (opCtx.hasVC()) { inspectors.add(opCtx.vcsObjectInspector); } opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors); return opCtx; }