예제 #1
0
  /**
   * Compares sequentially the fields from two tuples and returns which field they differ from. Use
   * custom comparators when provided. The provided RawComparators must implement "compare" so we
   * should use them.
   *
   * <p>Important. The contract of this method is that the tuples will differ always between
   * minField and maxField. If they are equal then an Exception is thrown.
   */
  private int indexMismatch(ITuple tuple1, ITuple tuple2, int minFieldIndex, int maxFieldIndex) {
    int schemaId1 = tupleMRConfig.getSchemaIdByName(tuple1.getSchema().getName());
    int schemaId2 = tupleMRConfig.getSchemaIdByName(tuple2.getSchema().getName());
    int[] translationTuple1 = serInfo.getGroupSchemaIndexTranslation(schemaId1);
    int[] translationTuple2 = serInfo.getGroupSchemaIndexTranslation(schemaId2);

    for (int i = minFieldIndex; i <= maxFieldIndex; i++) {
      Object obj1 = tuple1.get(translationTuple1[i]);
      Object obj2 = tuple2.get(translationTuple2[i]);
      @SuppressWarnings("unchecked")
      RawComparator<Object> customComparator = (RawComparator<Object>) customComparators[i];

      if (customComparator != null) {
        if (customComparator.compare(obj1, obj2) != 0) {
          return i;
        }
      } else {
        if (!obj1.equals(obj2)) {
          return i;
        }
      }
    }
    throw new RuntimeException(
        "Illegal state.The tuples "
            + tuple1
            + " and "
            + tuple2
            + " compare the same between indexes "
            + minFieldIndex
            + " and "
            + maxFieldIndex);
  }
예제 #2
0
  @Override
  public final void reduce(DatumWrapper<ITuple> key, Iterable<NullWritable> values, Context context)
      throws IOException, InterruptedException {

    try {
      Iterator<NullWritable> iterator = values.iterator();
      tupleIterator.setIterator(iterator);
      ITuple currentTuple = key.datum();
      ITuple previousKey = key.previousDatum();
      int indexMismatch;
      if (firstRun) {
        indexMismatch = minDepth;
        firstRun = false;
      } else {
        indexMismatch = indexMismatch(previousKey, currentTuple, 0, maxDepth);
        if (indexMismatch < minDepth) {
          indexMismatch = minDepth;
        }
        for (int i = maxDepth; i >= indexMismatch; i--) {
          handler.onCloseGroup(
              i, groupSchema.getField(i).getName(), previousKey, this.context, collector);
        }
      }

      for (int i = indexMismatch; i <= maxDepth; i++) {
        handler.onOpenGroup(
            i, groupSchema.getField(i).getName(), currentTuple, this.context, collector);
      }

      // We set a view over the group fields to the method.
      if (isMultipleSources) {
        int schemaId = tupleMRConfig.getSchemaIdByName(currentTuple.getSchema().getName());
        int[] indexTranslation = serInfo.getGroupSchemaIndexTranslation(schemaId);
        groupTuple.setContained(currentTuple, indexTranslation);
      } else {
        groupTuple.setContained(currentTuple);
      }

      handler.reduce(groupTuple, tupleIterator, this.context, collector);

      // This loop consumes the remaining elements that reduce didn't consume
      // The goal of this is to correctly set the last element in the next
      // onCloseGroup() call
      while (iterator.hasNext()) {
        iterator.next();
      }
    } catch (TupleMRException e) {
      throw new RuntimeException(e);
    }
  }