public void process(Writable value) throws HiveException { // A mapper can span multiple files/partitions. // The serializers need to be reset if the input file changed ExecMapperContext context = getExecContext(); if (context != null && context.inputFileChanged()) { // The child operators cleanup if input file has changed cleanUpInputFileChanged(); } int childrenDone = 0; for (MapOpCtx current : currentCtxs) { Object row = null; try { row = current.readRow(value, context); if (!current.forward(row)) { childrenDone++; } } catch (Exception e) { // TODO: policy on deserialization errors String message = toErrorMessage(value, row, current.rowObjectInspector); if (row == null) { deserialize_error_count.set(deserialize_error_count.get() + 1); throw new HiveException("Hive Runtime Error while processing writable " + message, e); } throw new HiveException("Hive Runtime Error while processing row " + message, e); } } rowsForwarded(childrenDone, 1); }
public static Object[] populateVirtualColumnValues( ExecMapperContext ctx, List<VirtualColumn> vcs, Object[] vcValues, Deserializer deserializer) { if (vcs == null) { return vcValues; } if (vcValues == null) { vcValues = new Object[vcs.size()]; } for (int i = 0; i < vcs.size(); i++) { VirtualColumn vc = vcs.get(i); if (vc.equals(VirtualColumn.FILENAME)) { if (ctx.inputFileChanged()) { vcValues[i] = new Text(ctx.getCurrentInputPath().toString()); } } else if (vc.equals(VirtualColumn.BLOCKOFFSET)) { long current = ctx.getIoCxt().getCurrentBlockStart(); LongWritable old = (LongWritable) vcValues[i]; if (old == null) { old = new LongWritable(current); vcValues[i] = old; continue; } if (current != old.get()) { old.set(current); } } else if (vc.equals(VirtualColumn.ROWOFFSET)) { long current = ctx.getIoCxt().getCurrentRow(); LongWritable old = (LongWritable) vcValues[i]; if (old == null) { old = new LongWritable(current); vcValues[i] = old; continue; } if (current != old.get()) { old.set(current); } } else if (vc.equals(VirtualColumn.RAWDATASIZE)) { long current = 0L; SerDeStats stats = deserializer.getSerDeStats(); if (stats != null) { current = stats.getRawDataSize(); } LongWritable old = (LongWritable) vcValues[i]; if (old == null) { old = new LongWritable(current); vcValues[i] = old; continue; } if (current != old.get()) { old.set(current); } } else if (vc.equals(VirtualColumn.ROWID)) { if (ctx.getIoCxt().getRecordIdentifier() == null) { vcValues[i] = null; } else { if (vcValues[i] == null) { vcValues[i] = new Object[RecordIdentifier.Field.values().length]; } RecordIdentifier.StructInfo.toArray( ctx.getIoCxt().getRecordIdentifier(), (Object[]) vcValues[i]); ctx.getIoCxt() .setRecordIdentifier(null); // so we don't accidentally cache the value; shouldn't // happen since IO layer either knows how to produce ROW__ID or not - but to be safe } } } return vcValues; }