/**
   * Compares sequentially the fields from two tuples and returns which field they differ from. Use
   * custom comparators when provided. The provided RawComparators must implement "compare" so we
   * should use them.
   *
   * <p>Important. The contract of this method is that the tuples will differ always between
   * minField and maxField. If they are equal then an Exception is thrown.
   */
  private int indexMismatch(ITuple tuple1, ITuple tuple2, int minFieldIndex, int maxFieldIndex) {
    int schemaId1 = tupleMRConfig.getSchemaIdByName(tuple1.getSchema().getName());
    int schemaId2 = tupleMRConfig.getSchemaIdByName(tuple2.getSchema().getName());
    int[] translationTuple1 = serInfo.getGroupSchemaIndexTranslation(schemaId1);
    int[] translationTuple2 = serInfo.getGroupSchemaIndexTranslation(schemaId2);

    for (int i = minFieldIndex; i <= maxFieldIndex; i++) {
      Object obj1 = tuple1.get(translationTuple1[i]);
      Object obj2 = tuple2.get(translationTuple2[i]);
      @SuppressWarnings("unchecked")
      RawComparator<Object> customComparator = (RawComparator<Object>) customComparators[i];

      if (customComparator != null) {
        if (customComparator.compare(obj1, obj2) != 0) {
          return i;
        }
      } else {
        if (!obj1.equals(obj2)) {
          return i;
        }
      }
    }
    throw new RuntimeException(
        "Illegal state.The tuples "
            + tuple1
            + " and "
            + tuple2
            + " compare the same between indexes "
            + minFieldIndex
            + " and "
            + maxFieldIndex);
  }
  @Override
  public final void reduce(DatumWrapper<ITuple> key, Iterable<NullWritable> values, Context context)
      throws IOException, InterruptedException {

    try {
      Iterator<NullWritable> iterator = values.iterator();
      tupleIterator.setIterator(iterator);
      ITuple currentTuple = key.datum();
      ITuple previousKey = key.previousDatum();
      int indexMismatch;
      if (firstRun) {
        indexMismatch = minDepth;
        firstRun = false;
      } else {
        indexMismatch = indexMismatch(previousKey, currentTuple, 0, maxDepth);
        if (indexMismatch < minDepth) {
          indexMismatch = minDepth;
        }
        for (int i = maxDepth; i >= indexMismatch; i--) {
          handler.onCloseGroup(
              i, groupSchema.getField(i).getName(), previousKey, this.context, collector);
        }
      }

      for (int i = indexMismatch; i <= maxDepth; i++) {
        handler.onOpenGroup(
            i, groupSchema.getField(i).getName(), currentTuple, this.context, collector);
      }

      // We set a view over the group fields to the method.
      if (isMultipleSources) {
        int schemaId = tupleMRConfig.getSchemaIdByName(currentTuple.getSchema().getName());
        int[] indexTranslation = serInfo.getGroupSchemaIndexTranslation(schemaId);
        groupTuple.setContained(currentTuple, indexTranslation);
      } else {
        groupTuple.setContained(currentTuple);
      }

      handler.reduce(groupTuple, tupleIterator, this.context, collector);

      // This loop consumes the remaining elements that reduce didn't consume
      // The goal of this is to correctly set the last element in the next
      // onCloseGroup() call
      while (iterator.hasNext()) {
        iterator.next();
      }
    } catch (TupleMRException e) {
      throw new RuntimeException(e);
    }
  }
Exemple #3
0
 /** Fills the fields specified by the range (minIndex, maxIndex) with random data. */
 protected static void fillTuple(boolean isRandom, ITuple tuple, int minIndex, int maxIndex) {
   try {
     for (int i = minIndex; i <= maxIndex; i++) {
       Field field = tuple.getSchema().getField(i);
       switch (field.getType()) {
         case INT:
           tuple.set(i, isRandom ? random.nextInt() : 0);
           break;
         case LONG:
           tuple.set(i, isRandom ? random.nextLong() : 0);
           break;
         case BOOLEAN:
           tuple.set(i, isRandom ? random.nextBoolean() : false);
           break;
         case DOUBLE:
           tuple.set(i, isRandom ? random.nextDouble() : 0.0);
           break;
         case FLOAT:
           tuple.set(i, isRandom ? random.nextFloat() : 0f);
           break;
         case STRING:
           fillString(isRandom, tuple, i);
           break;
         case ENUM:
           fillEnum(isRandom, field, tuple, i);
           break;
         case OBJECT:
           fillObject(isRandom, tuple, field, i);
           break;
         default:
           throw new IllegalArgumentException("Not supported type " + field.getType());
       }
     }
   } catch (Exception e) {
     throw new RuntimeException(e);
   }
 }
Exemple #4
0
 protected static void fillTuple(boolean random, ITuple tuple) {
   fillTuple(random, tuple, 0, tuple.getSchema().getFields().size() - 1);
 }