Beispiel #1
0
 public void process(Writable value) throws HiveException {
   // A mapper can span multiple files/partitions.
   // The serializers need to be reset if the input file changed
   ExecMapperContext context = getExecContext();
   if (context != null && context.inputFileChanged()) {
     // The child operators cleanup if input file has changed
     cleanUpInputFileChanged();
   }
   int childrenDone = 0;
   for (MapOpCtx current : currentCtxs) {
     Object row = null;
     try {
       row = current.readRow(value, context);
       if (!current.forward(row)) {
         childrenDone++;
       }
     } catch (Exception e) {
       // TODO: policy on deserialization errors
       String message = toErrorMessage(value, row, current.rowObjectInspector);
       if (row == null) {
         deserialize_error_count.set(deserialize_error_count.get() + 1);
         throw new HiveException("Hive Runtime Error while processing writable " + message, e);
       }
       throw new HiveException("Hive Runtime Error while processing row " + message, e);
     }
   }
   rowsForwarded(childrenDone, 1);
 }
Beispiel #2
0
 public static Object[] populateVirtualColumnValues(
     ExecMapperContext ctx,
     List<VirtualColumn> vcs,
     Object[] vcValues,
     Deserializer deserializer) {
   if (vcs == null) {
     return vcValues;
   }
   if (vcValues == null) {
     vcValues = new Object[vcs.size()];
   }
   for (int i = 0; i < vcs.size(); i++) {
     VirtualColumn vc = vcs.get(i);
     if (vc.equals(VirtualColumn.FILENAME)) {
       if (ctx.inputFileChanged()) {
         vcValues[i] = new Text(ctx.getCurrentInputPath().toString());
       }
     } else if (vc.equals(VirtualColumn.BLOCKOFFSET)) {
       long current = ctx.getIoCxt().getCurrentBlockStart();
       LongWritable old = (LongWritable) vcValues[i];
       if (old == null) {
         old = new LongWritable(current);
         vcValues[i] = old;
         continue;
       }
       if (current != old.get()) {
         old.set(current);
       }
     } else if (vc.equals(VirtualColumn.ROWOFFSET)) {
       long current = ctx.getIoCxt().getCurrentRow();
       LongWritable old = (LongWritable) vcValues[i];
       if (old == null) {
         old = new LongWritable(current);
         vcValues[i] = old;
         continue;
       }
       if (current != old.get()) {
         old.set(current);
       }
     } else if (vc.equals(VirtualColumn.RAWDATASIZE)) {
       long current = 0L;
       SerDeStats stats = deserializer.getSerDeStats();
       if (stats != null) {
         current = stats.getRawDataSize();
       }
       LongWritable old = (LongWritable) vcValues[i];
       if (old == null) {
         old = new LongWritable(current);
         vcValues[i] = old;
         continue;
       }
       if (current != old.get()) {
         old.set(current);
       }
     } else if (vc.equals(VirtualColumn.ROWID)) {
       if (ctx.getIoCxt().getRecordIdentifier() == null) {
         vcValues[i] = null;
       } else {
         if (vcValues[i] == null) {
           vcValues[i] = new Object[RecordIdentifier.Field.values().length];
         }
         RecordIdentifier.StructInfo.toArray(
             ctx.getIoCxt().getRecordIdentifier(), (Object[]) vcValues[i]);
         ctx.getIoCxt()
             .setRecordIdentifier(null); // so we don't accidentally cache the value; shouldn't
         // happen since IO layer either knows how to produce ROW__ID or not - but to be safe
       }
     }
   }
   return vcValues;
 }