public static void setupNeededColumns(
      TableScanOperator scanOp, RowResolver inputRR, List<String> cols) throws SemanticException {
    List<Integer> neededColumnIds = new ArrayList<Integer>();
    List<String> neededColumnNames = new ArrayList<String>();
    List<String> referencedColumnNames = new ArrayList<String>();
    TableScanDesc desc = scanOp.getConf();
    List<VirtualColumn> virtualCols = desc.getVirtualCols();
    List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();

    // add virtual columns for ANALYZE TABLE
    if (scanOp.getConf().isGatherStats()) {
      cols.add(VirtualColumn.RAWDATASIZE.getName());
    }

    for (String column : cols) {
      String[] tabCol = inputRR.reverseLookup(column);
      if (tabCol == null) {
        continue;
      }
      referencedColumnNames.add(column);
      ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
      if (colInfo.getIsVirtualCol()) {
        // part is also a virtual column, but part col should not in this
        // list.
        for (int j = 0; j < virtualCols.size(); j++) {
          VirtualColumn vc = virtualCols.get(j);
          if (vc.getName().equals(colInfo.getInternalName())) {
            newVirtualCols.add(vc);
          }
        }
        // no need to pass virtual columns to reader.
        continue;
      }
      int position = inputRR.getPosition(column);
      if (position >= 0) {
        // get the needed columns by id and name
        neededColumnIds.add(position);
        neededColumnNames.add(column);
      }
    }

    desc.setVirtualCols(newVirtualCols);
    scanOp.setNeededColumnIDs(neededColumnIds);
    scanOp.setNeededColumns(neededColumnNames);
    scanOp.setReferencedColumns(referencedColumnNames);
  }
Пример #2
0
 public static Object[] populateVirtualColumnValues(
     ExecMapperContext ctx,
     List<VirtualColumn> vcs,
     Object[] vcValues,
     Deserializer deserializer) {
   if (vcs == null) {
     return vcValues;
   }
   if (vcValues == null) {
     vcValues = new Object[vcs.size()];
   }
   for (int i = 0; i < vcs.size(); i++) {
     VirtualColumn vc = vcs.get(i);
     if (vc.equals(VirtualColumn.FILENAME)) {
       if (ctx.inputFileChanged()) {
         vcValues[i] = new Text(ctx.getCurrentInputPath().toString());
       }
     } else if (vc.equals(VirtualColumn.BLOCKOFFSET)) {
       long current = ctx.getIoCxt().getCurrentBlockStart();
       LongWritable old = (LongWritable) vcValues[i];
       if (old == null) {
         old = new LongWritable(current);
         vcValues[i] = old;
         continue;
       }
       if (current != old.get()) {
         old.set(current);
       }
     } else if (vc.equals(VirtualColumn.ROWOFFSET)) {
       long current = ctx.getIoCxt().getCurrentRow();
       LongWritable old = (LongWritable) vcValues[i];
       if (old == null) {
         old = new LongWritable(current);
         vcValues[i] = old;
         continue;
       }
       if (current != old.get()) {
         old.set(current);
       }
     } else if (vc.equals(VirtualColumn.RAWDATASIZE)) {
       long current = 0L;
       SerDeStats stats = deserializer.getSerDeStats();
       if (stats != null) {
         current = stats.getRawDataSize();
       }
       LongWritable old = (LongWritable) vcValues[i];
       if (old == null) {
         old = new LongWritable(current);
         vcValues[i] = old;
         continue;
       }
       if (current != old.get()) {
         old.set(current);
       }
     } else if (vc.equals(VirtualColumn.ROWID)) {
       if (ctx.getIoCxt().getRecordIdentifier() == null) {
         vcValues[i] = null;
       } else {
         if (vcValues[i] == null) {
           vcValues[i] = new Object[RecordIdentifier.Field.values().length];
         }
         RecordIdentifier.StructInfo.toArray(
             ctx.getIoCxt().getRecordIdentifier(), (Object[]) vcValues[i]);
         ctx.getIoCxt()
             .setRecordIdentifier(null); // so we don't accidentally cache the value; shouldn't
         // happen since IO layer either knows how to produce ROW__ID or not - but to be safe
       }
     }
   }
   return vcValues;
 }
Пример #3
0
  private MapOpCtx initObjectInspector(
      Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception {
    PartitionDesc pd = opCtx.partDesc;
    TableDesc td = pd.getTableDesc();

    // Use table properties in case of unpartitioned tables,
    // and the union of table properties and partition properties, with partition
    // taking precedence, in the case of partitioned tables
    Properties overlayedProps =
        SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties());

    Map<String, String> partSpec = pd.getPartSpec();

    opCtx.tableName = String.valueOf(overlayedProps.getProperty("name"));
    opCtx.partName = String.valueOf(partSpec);
    opCtx.deserializer = pd.getDeserializer(hconf);

    StructObjectInspector partRawRowObjectInspector;
    if (Utilities.isInputFileFormatSelfDescribing(pd)) {
      partRawRowObjectInspector = tableRowOI;
    } else {
      partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector();
    }

    opCtx.partTblObjectInspectorConverter =
        ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI);

    // Next check if this table has partitions and if so
    // get the list of partition names as well as allocate
    // the serdes for the partition columns
    String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);

    if (pcols != null && pcols.length() > 0) {
      String[] partKeys = pcols.trim().split("/");
      String pcolTypes =
          overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
      String[] partKeyTypes = pcolTypes.trim().split(":");

      if (partKeys.length > partKeyTypes.length) {
        throw new HiveException(
            "Internal error : partKeys length, "
                + partKeys.length
                + " greater than partKeyTypes length, "
                + partKeyTypes.length);
      }

      List<String> partNames = new ArrayList<String>(partKeys.length);
      Object[] partValues = new Object[partKeys.length];
      List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length);

      for (int i = 0; i < partKeys.length; i++) {
        String key = partKeys[i];
        partNames.add(key);
        ObjectInspector oi =
            PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
                TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));

        // Partitions do not exist for this table
        if (partSpec == null) {
          // for partitionless table, initialize partValue to null
          partValues[i] = null;
        } else {
          partValues[i] =
              ObjectInspectorConverters.getConverter(
                      PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
                  .convert(partSpec.get(key));
        }
        partObjectInspectors.add(oi);
      }
      opCtx.rowWithPart = new Object[] {null, partValues};
      opCtx.partObjectInspector =
          ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors);
    }

    // The op may not be a TableScan for mapjoins
    // Consider the query: select /*+MAPJOIN(a)*/ count(*) FROM T1 a JOIN T2 b ON a.key = b.key;
    // In that case, it will be a Select, but the rowOI need not be amended
    if (opCtx.op instanceof TableScanOperator) {
      TableScanOperator tsOp = (TableScanOperator) opCtx.op;
      TableScanDesc tsDesc = tsOp.getConf();
      if (tsDesc != null && tsDesc.hasVirtualCols()) {
        opCtx.vcs = tsDesc.getVirtualCols();
        opCtx.vcValues = new Object[opCtx.vcs.size()];
        opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs);
        if (opCtx.isPartitioned()) {
          opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3);
        } else {
          opCtx.rowWithPartAndVC = new Object[2];
        }
      }
    }
    if (!opCtx.hasVC() && !opCtx.isPartitioned()) {
      opCtx.rowObjectInspector = tableRowOI;
      return opCtx;
    }
    List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>();
    inspectors.add(tableRowOI);
    if (opCtx.isPartitioned()) {
      inspectors.add(opCtx.partObjectInspector);
    }
    if (opCtx.hasVC()) {
      inspectors.add(opCtx.vcsObjectInspector);
    }
    opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors);
    return opCtx;
  }