/** * Compares sequentially the fields from two tuples and returns which field they differ from. Use * custom comparators when provided. The provided RawComparators must implement "compare" so we * should use them. * * <p>Important. The contract of this method is that the tuples will differ always between * minField and maxField. If they are equal then an Exception is thrown. */ private int indexMismatch(ITuple tuple1, ITuple tuple2, int minFieldIndex, int maxFieldIndex) { int schemaId1 = tupleMRConfig.getSchemaIdByName(tuple1.getSchema().getName()); int schemaId2 = tupleMRConfig.getSchemaIdByName(tuple2.getSchema().getName()); int[] translationTuple1 = serInfo.getGroupSchemaIndexTranslation(schemaId1); int[] translationTuple2 = serInfo.getGroupSchemaIndexTranslation(schemaId2); for (int i = minFieldIndex; i <= maxFieldIndex; i++) { Object obj1 = tuple1.get(translationTuple1[i]); Object obj2 = tuple2.get(translationTuple2[i]); @SuppressWarnings("unchecked") RawComparator<Object> customComparator = (RawComparator<Object>) customComparators[i]; if (customComparator != null) { if (customComparator.compare(obj1, obj2) != 0) { return i; } } else { if (!obj1.equals(obj2)) { return i; } } } throw new RuntimeException( "Illegal state.The tuples " + tuple1 + " and " + tuple2 + " compare the same between indexes " + minFieldIndex + " and " + maxFieldIndex); }
@Override public final void reduce(DatumWrapper<ITuple> key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { try { Iterator<NullWritable> iterator = values.iterator(); tupleIterator.setIterator(iterator); ITuple currentTuple = key.datum(); ITuple previousKey = key.previousDatum(); int indexMismatch; if (firstRun) { indexMismatch = minDepth; firstRun = false; } else { indexMismatch = indexMismatch(previousKey, currentTuple, 0, maxDepth); if (indexMismatch < minDepth) { indexMismatch = minDepth; } for (int i = maxDepth; i >= indexMismatch; i--) { handler.onCloseGroup( i, groupSchema.getField(i).getName(), previousKey, this.context, collector); } } for (int i = indexMismatch; i <= maxDepth; i++) { handler.onOpenGroup( i, groupSchema.getField(i).getName(), currentTuple, this.context, collector); } // We set a view over the group fields to the method. if (isMultipleSources) { int schemaId = tupleMRConfig.getSchemaIdByName(currentTuple.getSchema().getName()); int[] indexTranslation = serInfo.getGroupSchemaIndexTranslation(schemaId); groupTuple.setContained(currentTuple, indexTranslation); } else { groupTuple.setContained(currentTuple); } handler.reduce(groupTuple, tupleIterator, this.context, collector); // This loop consumes the remaining elements that reduce didn't consume // The goal of this is to correctly set the last element in the next // onCloseGroup() call while (iterator.hasNext()) { iterator.next(); } } catch (TupleMRException e) { throw new RuntimeException(e); } }
/** Fills the fields specified by the range (minIndex, maxIndex) with random data. */ protected static void fillTuple(boolean isRandom, ITuple tuple, int minIndex, int maxIndex) { try { for (int i = minIndex; i <= maxIndex; i++) { Field field = tuple.getSchema().getField(i); switch (field.getType()) { case INT: tuple.set(i, isRandom ? random.nextInt() : 0); break; case LONG: tuple.set(i, isRandom ? random.nextLong() : 0); break; case BOOLEAN: tuple.set(i, isRandom ? random.nextBoolean() : false); break; case DOUBLE: tuple.set(i, isRandom ? random.nextDouble() : 0.0); break; case FLOAT: tuple.set(i, isRandom ? random.nextFloat() : 0f); break; case STRING: fillString(isRandom, tuple, i); break; case ENUM: fillEnum(isRandom, field, tuple, i); break; case OBJECT: fillObject(isRandom, tuple, field, i); break; default: throw new IllegalArgumentException("Not supported type " + field.getType()); } } } catch (Exception e) { throw new RuntimeException(e); } }
protected static void fillTuple(boolean random, ITuple tuple) { fillTuple(random, tuple, 0, tuple.getSchema().getFields().size() - 1); }