/** * Compares sequentially the fields from two tuples and returns which field they differ from. Use * custom comparators when provided. The provided RawComparators must implement "compare" so we * should use them. * * <p>Important. The contract of this method is that the tuples will differ always between * minField and maxField. If they are equal then an Exception is thrown. */ private int indexMismatch(ITuple tuple1, ITuple tuple2, int minFieldIndex, int maxFieldIndex) { int schemaId1 = tupleMRConfig.getSchemaIdByName(tuple1.getSchema().getName()); int schemaId2 = tupleMRConfig.getSchemaIdByName(tuple2.getSchema().getName()); int[] translationTuple1 = serInfo.getGroupSchemaIndexTranslation(schemaId1); int[] translationTuple2 = serInfo.getGroupSchemaIndexTranslation(schemaId2); for (int i = minFieldIndex; i <= maxFieldIndex; i++) { Object obj1 = tuple1.get(translationTuple1[i]); Object obj2 = tuple2.get(translationTuple2[i]); @SuppressWarnings("unchecked") RawComparator<Object> customComparator = (RawComparator<Object>) customComparators[i]; if (customComparator != null) { if (customComparator.compare(obj1, obj2) != 0) { return i; } } else { if (!obj1.equals(obj2)) { return i; } } } throw new RuntimeException( "Illegal state.The tuples " + tuple1 + " and " + tuple2 + " compare the same between indexes " + minFieldIndex + " and " + maxFieldIndex); }
@Override public final void reduce(DatumWrapper<ITuple> key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { try { Iterator<NullWritable> iterator = values.iterator(); tupleIterator.setIterator(iterator); ITuple currentTuple = key.datum(); ITuple previousKey = key.previousDatum(); int indexMismatch; if (firstRun) { indexMismatch = minDepth; firstRun = false; } else { indexMismatch = indexMismatch(previousKey, currentTuple, 0, maxDepth); if (indexMismatch < minDepth) { indexMismatch = minDepth; } for (int i = maxDepth; i >= indexMismatch; i--) { handler.onCloseGroup( i, groupSchema.getField(i).getName(), previousKey, this.context, collector); } } for (int i = indexMismatch; i <= maxDepth; i++) { handler.onOpenGroup( i, groupSchema.getField(i).getName(), currentTuple, this.context, collector); } // We set a view over the group fields to the method. if (isMultipleSources) { int schemaId = tupleMRConfig.getSchemaIdByName(currentTuple.getSchema().getName()); int[] indexTranslation = serInfo.getGroupSchemaIndexTranslation(schemaId); groupTuple.setContained(currentTuple, indexTranslation); } else { groupTuple.setContained(currentTuple); } handler.reduce(groupTuple, tupleIterator, this.context, collector); // This loop consumes the remaining elements that reduce didn't consume // The goal of this is to correctly set the last element in the next // onCloseGroup() call while (iterator.hasNext()) { iterator.next(); } } catch (TupleMRException e) { throw new RuntimeException(e); } }