public static void setupNeededColumns( TableScanOperator scanOp, RowResolver inputRR, List<String> cols) throws SemanticException { List<Integer> neededColumnIds = new ArrayList<Integer>(); List<String> neededColumnNames = new ArrayList<String>(); List<String> referencedColumnNames = new ArrayList<String>(); TableScanDesc desc = scanOp.getConf(); List<VirtualColumn> virtualCols = desc.getVirtualCols(); List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>(); // add virtual columns for ANALYZE TABLE if (scanOp.getConf().isGatherStats()) { cols.add(VirtualColumn.RAWDATASIZE.getName()); } for (String column : cols) { String[] tabCol = inputRR.reverseLookup(column); if (tabCol == null) { continue; } referencedColumnNames.add(column); ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]); if (colInfo.getIsVirtualCol()) { // part is also a virtual column, but part col should not in this // list. for (int j = 0; j < virtualCols.size(); j++) { VirtualColumn vc = virtualCols.get(j); if (vc.getName().equals(colInfo.getInternalName())) { newVirtualCols.add(vc); } } // no need to pass virtual columns to reader. continue; } int position = inputRR.getPosition(column); if (position >= 0) { // get the needed columns by id and name neededColumnIds.add(position); neededColumnNames.add(column); } } desc.setVirtualCols(newVirtualCols); scanOp.setNeededColumnIDs(neededColumnIds); scanOp.setNeededColumns(neededColumnNames); scanOp.setReferencedColumns(referencedColumnNames); }
public static Object[] populateVirtualColumnValues( ExecMapperContext ctx, List<VirtualColumn> vcs, Object[] vcValues, Deserializer deserializer) { if (vcs == null) { return vcValues; } if (vcValues == null) { vcValues = new Object[vcs.size()]; } for (int i = 0; i < vcs.size(); i++) { VirtualColumn vc = vcs.get(i); if (vc.equals(VirtualColumn.FILENAME)) { if (ctx.inputFileChanged()) { vcValues[i] = new Text(ctx.getCurrentInputPath().toString()); } } else if (vc.equals(VirtualColumn.BLOCKOFFSET)) { long current = ctx.getIoCxt().getCurrentBlockStart(); LongWritable old = (LongWritable) vcValues[i]; if (old == null) { old = new LongWritable(current); vcValues[i] = old; continue; } if (current != old.get()) { old.set(current); } } else if (vc.equals(VirtualColumn.ROWOFFSET)) { long current = ctx.getIoCxt().getCurrentRow(); LongWritable old = (LongWritable) vcValues[i]; if (old == null) { old = new LongWritable(current); vcValues[i] = old; continue; } if (current != old.get()) { old.set(current); } } else if (vc.equals(VirtualColumn.RAWDATASIZE)) { long current = 0L; SerDeStats stats = deserializer.getSerDeStats(); if (stats != null) { current = stats.getRawDataSize(); } LongWritable old = (LongWritable) vcValues[i]; if (old == null) { old = new LongWritable(current); vcValues[i] = old; continue; } if (current != old.get()) { old.set(current); } } else if (vc.equals(VirtualColumn.ROWID)) { if (ctx.getIoCxt().getRecordIdentifier() == null) { vcValues[i] = null; } else { if (vcValues[i] == null) { vcValues[i] = new Object[RecordIdentifier.Field.values().length]; } RecordIdentifier.StructInfo.toArray( ctx.getIoCxt().getRecordIdentifier(), (Object[]) vcValues[i]); ctx.getIoCxt() .setRecordIdentifier(null); // so we don't accidentally cache the value; shouldn't // happen since IO layer either knows how to produce ROW__ID or not - but to be safe } } } return vcValues; }
private MapOpCtx initObjectInspector( Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception { PartitionDesc pd = opCtx.partDesc; TableDesc td = pd.getTableDesc(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition // taking precedence, in the case of partitioned tables Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties()); Map<String, String> partSpec = pd.getPartSpec(); opCtx.tableName = String.valueOf(overlayedProps.getProperty("name")); opCtx.partName = String.valueOf(partSpec); opCtx.deserializer = pd.getDeserializer(hconf); StructObjectInspector partRawRowObjectInspector; if (Utilities.isInputFileFormatSelfDescribing(pd)) { partRawRowObjectInspector = tableRowOI; } else { partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector(); } opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI); // Next check if this table has partitions and if so // get the list of partition names as well as allocate // the serdes for the partition columns String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); if (pcols != null && pcols.length() > 0) { String[] partKeys = pcols.trim().split("/"); String pcolTypes = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); String[] partKeyTypes = pcolTypes.trim().split(":"); if (partKeys.length > partKeyTypes.length) { throw new HiveException( "Internal error : partKeys length, " + partKeys.length + " greater than partKeyTypes length, " + partKeyTypes.length); } List<String> partNames = new ArrayList<String>(partKeys.length); Object[] partValues = new Object[partKeys.length]; List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length); for (int i = 0; i < partKeys.length; i++) { String key = partKeys[i]; partNames.add(key); ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); // Partitions do not exist for this table if (partSpec == null) { // for partitionless table, initialize partValue to null partValues[i] = null; } else { partValues[i] = ObjectInspectorConverters.getConverter( PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi) .convert(partSpec.get(key)); } partObjectInspectors.add(oi); } opCtx.rowWithPart = new Object[] {null, partValues}; opCtx.partObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors); } // The op may not be a TableScan for mapjoins // Consider the query: select /*+MAPJOIN(a)*/ count(*) FROM T1 a JOIN T2 b ON a.key = b.key; // In that case, it will be a Select, but the rowOI need not be amended if (opCtx.op instanceof TableScanOperator) { TableScanOperator tsOp = (TableScanOperator) opCtx.op; TableScanDesc tsDesc = tsOp.getConf(); if (tsDesc != null && tsDesc.hasVirtualCols()) { opCtx.vcs = tsDesc.getVirtualCols(); opCtx.vcValues = new Object[opCtx.vcs.size()]; opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs); if (opCtx.isPartitioned()) { opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3); } else { opCtx.rowWithPartAndVC = new Object[2]; } } } if (!opCtx.hasVC() && !opCtx.isPartitioned()) { opCtx.rowObjectInspector = tableRowOI; return opCtx; } List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>(); inspectors.add(tableRowOI); if (opCtx.isPartitioned()) { inspectors.add(opCtx.partObjectInspector); } if (opCtx.hasVC()) { inspectors.add(opCtx.vcsObjectInspector); } opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors); return opCtx; }