@Override public void close() { // No row was processed if (oc == null) { LOG.trace("Close called without any rows processed"); } try { if (groupKey != null) { // If a operator wants to do some work at the end of a group LOG.trace("End Group"); reducer.endGroup(); } if (isLogInfoEnabled) { logCloseInfo(); } reducer.close(abort); if (localWork != null) { for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) { dummyOp.close(abort); } } ReportStats rps = new ReportStats(rp, jc); reducer.preorderMap(rps); } catch (Exception e) { if (!abort) { // signal new failure to map-reduce LOG.error("Hit error while closing operators - failing tree"); throw new RuntimeException( "Hive Runtime Error while closing operators: " + e.getMessage(), e); } } finally { MapredContext.close(); Utilities.clearWorkMap(); } }
@Override @SuppressWarnings("unchecked") public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS); super.init(job, output, reporter); rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE]; ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE]; ObjectInspector keyObjectInspector; ReduceWork gWork = Utilities.getReduceWork(job); reducer = gWork.getReducer(); vectorized = gWork.getVectorMode(); reducer.setParentOperators(null); // clear out any parents as reducer is the // root isTagged = gWork.getNeedsTagging(); try { keyTableDesc = gWork.getKeyDesc(); inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null); SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null); keyObjectInspector = inputKeyDeserializer.getObjectInspector(); valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()]; if (vectorized) { final int maxTags = gWork.getTagToValueDesc().size(); keyStructInspector = (StructObjectInspector) keyObjectInspector; batches = new VectorizedRowBatch[maxTags]; valueStructInspectors = new StructObjectInspector[maxTags]; valueStringWriters = new List[maxTags]; keysColumnOffset = keyStructInspector.getAllStructFieldRefs().size(); buffer = new DataOutputBuffer(); } for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) { // We should initialize the SerDe with the TypeInfo when available. valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag); inputValueDeserializer[tag] = ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null); SerDeUtils.initializeSerDe( inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null); valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector(); ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>(); if (vectorized) { /* vectorization only works with struct object inspectors */ valueStructInspectors[tag] = (StructObjectInspector) valueObjectInspector[tag]; ObjectPair<VectorizedRowBatch, StandardStructObjectInspector> pair = VectorizedBatchUtil.constructVectorizedRowBatch( keyStructInspector, valueStructInspectors[tag], gWork.getVectorScratchColumnTypeMap()); batches[tag] = pair.getFirst(); final int totalColumns = keysColumnOffset + valueStructInspectors[tag].getAllStructFieldRefs().size(); valueStringWriters[tag] = new ArrayList<VectorExpressionWriter>(totalColumns); valueStringWriters[tag].addAll( Arrays.asList( VectorExpressionWriterFactory.genVectorStructExpressionWritables( keyStructInspector))); valueStringWriters[tag].addAll( Arrays.asList( VectorExpressionWriterFactory.genVectorStructExpressionWritables( valueStructInspectors[tag]))); rowObjectInspector[tag] = pair.getSecond(); } else { ois.add(keyObjectInspector); ois.add(valueObjectInspector[tag]); // reducer.setGroupKeyObjectInspector(keyObjectInspector); rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector( Utilities.reduceFieldNameList, ois); } } } catch (Exception e) { throw new RuntimeException(e); } ExecMapperContext execContext = new ExecMapperContext(job); localWork = gWork.getMapRedLocalWork(); execContext.setJc(jc); execContext.setLocalWork(localWork); reducer.passExecContext(execContext); reducer.setReporter(rp); OperatorUtils.setChildrenCollector( Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output); // initialize reduce operator tree try { LOG.info(reducer.dump(0)); reducer.initialize(jc, rowObjectInspector); if (localWork != null) { for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) { dummyOp.setExecContext(execContext); dummyOp.initialize(jc, null); } } } catch (Throwable e) { abort = true; if (e instanceof OutOfMemoryError) { // Don't create a new object if we are already out of memory throw (OutOfMemoryError) e; } else { throw new RuntimeException("Reduce operator initialization failed", e); } } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS); }