static void initializeDeserializer(
     Deserializer deserializer, Configuration conf, HCatTableInfo info, HCatSchema schema)
     throws SerDeException {
   Properties props = getSerdeProperties(info, schema);
   LOG.info("Initializing " + deserializer.getClass().getName() + " with properties " + props);
   deserializer.initialize(conf, props);
 }
Exemple #2
0
 @SuppressWarnings("deprecation")
 private static void initializeDeserializer(Deserializer deserializer, Properties schema) {
   try {
     deserializer.initialize(new Configuration(false), schema);
   } catch (SerDeException e) {
     throw new RuntimeException(
         "error initializing deserializer: " + deserializer.getClass().getName());
   }
 }
Exemple #3
0
 /*
  * This is the same as the setChildren method below but for empty tables.
  * It takes care of the following:
  * 1. Create the right object inspector.
  * 2. Set up the childrenOpToOI with the object inspector.
  * So as to ensure that the initialization happens correctly.
  */
 public void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf)
     throws SerDeException, Exception {
   setChildOperators(children);
   for (Operator<?> child : children) {
     TableScanOperator tsOp = (TableScanOperator) child;
     StructObjectInspector soi = null;
     PartitionDesc partDesc = conf.getAliasToPartnInfo().get(tsOp.getConf().getAlias());
     Deserializer serde = partDesc.getTableDesc().getDeserializer();
     partDesc.setProperties(partDesc.getProperties());
     MapOpCtx opCtx = new MapOpCtx(tsOp.getConf().getAlias(), child, partDesc);
     StructObjectInspector tableRowOI = (StructObjectInspector) serde.getObjectInspector();
     initObjectInspector(hconf, opCtx, tableRowOI);
     soi = opCtx.rowObjectInspector;
     child.getParentOperators().add(this);
     childrenOpToOI.put(child, soi);
   }
 }
Exemple #4
0
 public static StructObjectInspector getTableObjectInspector(
     @SuppressWarnings("deprecation") Deserializer deserializer) {
   try {
     ObjectInspector inspector = deserializer.getObjectInspector();
     checkArgument(
         inspector.getCategory() == Category.STRUCT,
         "expected STRUCT: %s",
         inspector.getCategory());
     return (StructObjectInspector) inspector;
   } catch (SerDeException e) {
     throw Throwables.propagate(e);
   }
 }
Exemple #5
0
 private Object readRow(Writable value, ExecMapperContext context) throws SerDeException {
   Object deserialized = deserializer.deserialize(value);
   Object row = partTblObjectInspectorConverter.convert(deserialized);
   if (hasVC()) {
     rowWithPartAndVC[0] = row;
     if (context != null) {
       populateVirtualColumnValues(context, vcs, vcValues, deserializer);
     }
     int vcPos = isPartitioned() ? 2 : 1;
     rowWithPartAndVC[vcPos] = vcValues;
     return rowWithPartAndVC;
   } else if (isPartitioned()) {
     rowWithPart[0] = row;
     return rowWithPart;
   }
   return row;
 }
  @Override
  public boolean advanceNextPosition() {
    try {
      if (closed || !recordReader.next(key, value)) {
        close();
        return false;
      }

      // reset loaded flags
      // partition keys are already loaded, but everything else is not
      System.arraycopy(isPartitionColumn, 0, loaded, 0, isPartitionColumn.length);

      // decode value
      rowData = deserializer.deserialize(value);

      return true;
    } catch (IOException | SerDeException | RuntimeException e) {
      closeWithSuppression(e);
      throw new PrestoException(HIVE_CURSOR_ERROR.toErrorCode(), e);
    }
  }
Exemple #7
0
  /**
   * Traverse all the partitions for a table, and get the OI for the table. Note that a conversion
   * is required if any of the partition OI is different from the table OI. For eg. if the query
   * references table T (partitions P1, P2), and P1's schema is same as T, whereas P2's scheme is
   * different from T, conversion might be needed for both P1 and P2, since SettableOI might be
   * needed for T
   */
  private Map<TableDesc, StructObjectInspector> getConvertedOI(Configuration hconf)
      throws HiveException {
    Map<TableDesc, StructObjectInspector> tableDescOI =
        new HashMap<TableDesc, StructObjectInspector>();
    Set<TableDesc> identityConverterTableDesc = new HashSet<TableDesc>();
    try {
      Map<ObjectInspector, Boolean> oiSettableProperties = new HashMap<ObjectInspector, Boolean>();

      for (String onefile : conf.getPathToAliases().keySet()) {
        PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile);
        TableDesc tableDesc = pd.getTableDesc();
        Deserializer partDeserializer = pd.getDeserializer(hconf);

        StructObjectInspector partRawRowObjectInspector;
        if (Utilities.isInputFileFormatSelfDescribing(pd)) {
          Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
          partRawRowObjectInspector = (StructObjectInspector) tblDeserializer.getObjectInspector();
        } else {
          partRawRowObjectInspector = (StructObjectInspector) partDeserializer.getObjectInspector();
        }

        StructObjectInspector tblRawRowObjectInspector = tableDescOI.get(tableDesc);
        if ((tblRawRowObjectInspector == null)
            || (identityConverterTableDesc.contains(tableDesc))) {
          Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
          tblRawRowObjectInspector =
              (StructObjectInspector)
                  ObjectInspectorConverters.getConvertedOI(
                      partRawRowObjectInspector,
                      tblDeserializer.getObjectInspector(),
                      oiSettableProperties);

          if (identityConverterTableDesc.contains(tableDesc)) {
            if (!partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
              identityConverterTableDesc.remove(tableDesc);
            }
          } else if (partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
            identityConverterTableDesc.add(tableDesc);
          }

          tableDescOI.put(tableDesc, tblRawRowObjectInspector);
        }
      }
    } catch (Exception e) {
      throw new HiveException(e);
    }
    return tableDescOI;
  }
  @Override
  public <E> void processRow(Object key, Iterator<E> values) throws IOException {
    if (reducer.getDone()) {
      return;
    }

    try {
      BytesWritable keyWritable = (BytesWritable) key;
      byte tag = 0;
      if (isTagged) {
        // remove the tag from key coming out of reducer
        // and store it in separate variable.
        int size = keyWritable.getSize() - 1;
        tag = keyWritable.get()[size];
        keyWritable.setSize(size);
      }

      if (!keyWritable.equals(groupKey)) {
        // If a operator wants to do some work at the beginning of a group
        if (groupKey == null) { // the first group
          groupKey = new BytesWritable();
        } else {
          // If a operator wants to do some work at the end of a group
          LOG.trace("End Group");
          reducer.endGroup();
        }

        try {
          keyObject = inputKeyDeserializer.deserialize(keyWritable);
        } catch (Exception e) {
          throw new HiveException(
              "Hive Runtime Error: Unable to deserialize reduce input key from "
                  + Utilities.formatBinaryString(keyWritable.get(), 0, keyWritable.getSize())
                  + " with properties "
                  + keyTableDesc.getProperties(),
              e);
        }

        groupKey.set(keyWritable.get(), 0, keyWritable.getSize());
        LOG.trace("Start Group");
        reducer.setGroupKeyObject(keyObject);
        reducer.startGroup();
      }
      /* this.keyObject passed via reference */
      if (vectorized) {
        processVectors(values, tag);
      } else {
        processKeyValues(values, tag);
      }

    } catch (Throwable e) {
      abort = true;
      Utilities.setReduceWork(jc, null);
      if (e instanceof OutOfMemoryError) {
        // Don't create a new object if we are already out of memory
        throw (OutOfMemoryError) e;
      } else {
        String msg = "Fatal error: " + e;
        LOG.fatal(msg, e);
        throw new RuntimeException(e);
      }
    }
  }
  @Override
  @SuppressWarnings("unchecked")
  public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
    super.init(job, output, reporter);

    rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector keyObjectInspector;

    ReduceWork gWork = Utilities.getReduceWork(job);

    reducer = gWork.getReducer();
    vectorized = gWork.getVectorMode();
    reducer.setParentOperators(null); // clear out any parents as reducer is the
    // root
    isTagged = gWork.getNeedsTagging();
    try {
      keyTableDesc = gWork.getKeyDesc();
      inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
      SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
      keyObjectInspector = inputKeyDeserializer.getObjectInspector();
      valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];

      if (vectorized) {
        final int maxTags = gWork.getTagToValueDesc().size();
        keyStructInspector = (StructObjectInspector) keyObjectInspector;
        batches = new VectorizedRowBatch[maxTags];
        valueStructInspectors = new StructObjectInspector[maxTags];
        valueStringWriters = new List[maxTags];
        keysColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
        buffer = new DataOutputBuffer();
      }

      for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
        // We should initialize the SerDe with the TypeInfo when available.
        valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
        inputValueDeserializer[tag] =
            ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(
            inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null);
        valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();

        ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();

        if (vectorized) {
          /* vectorization only works with struct object inspectors */
          valueStructInspectors[tag] = (StructObjectInspector) valueObjectInspector[tag];

          ObjectPair<VectorizedRowBatch, StandardStructObjectInspector> pair =
              VectorizedBatchUtil.constructVectorizedRowBatch(
                  keyStructInspector,
                  valueStructInspectors[tag],
                  gWork.getVectorScratchColumnTypeMap());
          batches[tag] = pair.getFirst();
          final int totalColumns =
              keysColumnOffset + valueStructInspectors[tag].getAllStructFieldRefs().size();
          valueStringWriters[tag] = new ArrayList<VectorExpressionWriter>(totalColumns);
          valueStringWriters[tag].addAll(
              Arrays.asList(
                  VectorExpressionWriterFactory.genVectorStructExpressionWritables(
                      keyStructInspector)));
          valueStringWriters[tag].addAll(
              Arrays.asList(
                  VectorExpressionWriterFactory.genVectorStructExpressionWritables(
                      valueStructInspectors[tag])));

          rowObjectInspector[tag] = pair.getSecond();
        } else {
          ois.add(keyObjectInspector);
          ois.add(valueObjectInspector[tag]);
          // reducer.setGroupKeyObjectInspector(keyObjectInspector);
          rowObjectInspector[tag] =
              ObjectInspectorFactory.getStandardStructObjectInspector(
                  Utilities.reduceFieldNameList, ois);
        }
      }
    } catch (Exception e) {
      throw new RuntimeException(e);
    }

    ExecMapperContext execContext = new ExecMapperContext(job);
    localWork = gWork.getMapRedLocalWork();
    execContext.setJc(jc);
    execContext.setLocalWork(localWork);
    reducer.passExecContext(execContext);

    reducer.setReporter(rp);
    OperatorUtils.setChildrenCollector(
        Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);

    // initialize reduce operator tree
    try {
      LOG.info(reducer.dump(0));
      reducer.initialize(jc, rowObjectInspector);

      if (localWork != null) {
        for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
          dummyOp.setExecContext(execContext);
          dummyOp.initialize(jc, null);
        }
      }

    } catch (Throwable e) {
      abort = true;
      if (e instanceof OutOfMemoryError) {
        // Don't create a new object if we are already out of memory
        throw (OutOfMemoryError) e;
      } else {
        throw new RuntimeException("Reduce operator initialization failed", e);
      }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
  }
Exemple #10
0
 public static Object[] populateVirtualColumnValues(
     ExecMapperContext ctx,
     List<VirtualColumn> vcs,
     Object[] vcValues,
     Deserializer deserializer) {
   if (vcs == null) {
     return vcValues;
   }
   if (vcValues == null) {
     vcValues = new Object[vcs.size()];
   }
   for (int i = 0; i < vcs.size(); i++) {
     VirtualColumn vc = vcs.get(i);
     if (vc.equals(VirtualColumn.FILENAME)) {
       if (ctx.inputFileChanged()) {
         vcValues[i] = new Text(ctx.getCurrentInputPath().toString());
       }
     } else if (vc.equals(VirtualColumn.BLOCKOFFSET)) {
       long current = ctx.getIoCxt().getCurrentBlockStart();
       LongWritable old = (LongWritable) vcValues[i];
       if (old == null) {
         old = new LongWritable(current);
         vcValues[i] = old;
         continue;
       }
       if (current != old.get()) {
         old.set(current);
       }
     } else if (vc.equals(VirtualColumn.ROWOFFSET)) {
       long current = ctx.getIoCxt().getCurrentRow();
       LongWritable old = (LongWritable) vcValues[i];
       if (old == null) {
         old = new LongWritable(current);
         vcValues[i] = old;
         continue;
       }
       if (current != old.get()) {
         old.set(current);
       }
     } else if (vc.equals(VirtualColumn.RAWDATASIZE)) {
       long current = 0L;
       SerDeStats stats = deserializer.getSerDeStats();
       if (stats != null) {
         current = stats.getRawDataSize();
       }
       LongWritable old = (LongWritable) vcValues[i];
       if (old == null) {
         old = new LongWritable(current);
         vcValues[i] = old;
         continue;
       }
       if (current != old.get()) {
         old.set(current);
       }
     } else if (vc.equals(VirtualColumn.ROWID)) {
       if (ctx.getIoCxt().getRecordIdentifier() == null) {
         vcValues[i] = null;
       } else {
         if (vcValues[i] == null) {
           vcValues[i] = new Object[RecordIdentifier.Field.values().length];
         }
         RecordIdentifier.StructInfo.toArray(
             ctx.getIoCxt().getRecordIdentifier(), (Object[]) vcValues[i]);
         ctx.getIoCxt()
             .setRecordIdentifier(null); // so we don't accidentally cache the value; shouldn't
         // happen since IO layer either knows how to produce ROW__ID or not - but to be safe
       }
     }
   }
   return vcValues;
 }