Example #1
0
 /**
  * Sets a custom partitioner for the CoGroup operation. The partitioner will be called on the join
  * keys to determine the partition a key should be assigned to. The partitioner is evaluated on
  * both inputs in the same way.
  *
  * <p>NOTE: A custom partitioner can only be used with single-field CoGroup keys, not with
  * composite CoGroup keys.
  *
  * @param partitioner The custom partitioner to be used.
  * @return This CoGroup operator, to allow for function chaining.
  */
 public CoGroupOperator<I1, I2, OUT> withPartitioner(Partitioner<?> partitioner) {
   if (partitioner != null) {
     keys1.validateCustomPartitioner(partitioner, null);
     keys2.validateCustomPartitioner(partitioner, null);
   }
   this.customPartitioner = getInput1().clean(partitioner);
   return this;
 }
Example #2
0
  public CoGroupOperator(
      DataSet<I1> input1,
      DataSet<I2> input2,
      Keys<I1> keys1,
      Keys<I2> keys2,
      CoGroupFunction<I1, I2, OUT> function,
      TypeInformation<OUT> returnType,
      List<Pair<Integer, Order>> groupSortKeyOrderFirst,
      List<Pair<Integer, Order>> groupSortKeyOrderSecond,
      Partitioner<?> customPartitioner,
      String defaultName) {
    super(input1, input2, returnType);

    this.function = function;
    this.customPartitioner = customPartitioner;
    this.defaultName = defaultName;

    this.groupSortKeyOrderFirst =
        groupSortKeyOrderFirst == null
            ? Collections.<Pair<Integer, Order>>emptyList()
            : groupSortKeyOrderFirst;
    this.groupSortKeyOrderSecond =
        groupSortKeyOrderSecond == null
            ? Collections.<Pair<Integer, Order>>emptyList()
            : groupSortKeyOrderSecond;

    if (keys1 == null || keys2 == null) {
      throw new NullPointerException();
    }

    // sanity check solution set key mismatches
    if (input1 instanceof SolutionSetPlaceHolder) {
      if (keys1 instanceof ExpressionKeys) {
        int[] positions = keys1.computeLogicalKeyPositions();
        ((SolutionSetPlaceHolder<?>) input1).checkJoinKeyFields(positions);
      } else {
        throw new InvalidProgramException(
            "Currently, the solution set may only be CoGrouped with using tuple field positions.");
      }
    }
    if (input2 instanceof SolutionSetPlaceHolder) {
      if (keys2 instanceof ExpressionKeys) {
        int[] positions = keys2.computeLogicalKeyPositions();
        ((SolutionSetPlaceHolder<?>) input2).checkJoinKeyFields(positions);
      } else {
        throw new InvalidProgramException(
            "Currently, the solution set may only be CoGrouped with using tuple field positions.");
      }
    }

    this.keys1 = keys1;
    this.keys2 = keys2;

    UdfOperatorUtils.analyzeDualInputUdf(
        this, CoGroupFunction.class, defaultName, function, keys1, keys2);
  }
Example #3
0
 /**
  * Sets a custom partitioner for the CoGroup operation. The partitioner will be called on
  * the join keys to determine the partition a key should be assigned to. The partitioner is
  * evaluated on both inputs in the same way.
  *
  * <p>NOTE: A custom partitioner can only be used with single-field CoGroup keys, not with
  * composite CoGroup keys.
  *
  * @param partitioner The custom partitioner to be used.
  * @return This CoGroup operator, to allow for function chaining.
  */
 public CoGroupOperatorWithoutFunction withPartitioner(Partitioner<?> partitioner) {
   if (partitioner != null) {
     keys1.validateCustomPartitioner(partitioner, null);
     keys2.validateCustomPartitioner(partitioner, null);
   }
   this.customPartitioner = input1.clean(partitioner);
   return this;
 }
Example #4
0
      /**
       * Intermediate step of a CoGroup transformation. <br>
       * To continue the CoGroup transformation, provide a {@link
       * org.apache.flink.api.common.functions.RichCoGroupFunction} by calling {@link
       * org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate.CoGroupOperatorWithoutFunction#with(org.apache.flink.api.common.functions.CoGroupFunction)}.
       */
      private CoGroupOperatorWithoutFunction createCoGroupOperator(Keys<I2> keys2) {
        if (keys2 == null) {
          throw new NullPointerException();
        }

        if (keys2.isEmpty()) {
          throw new InvalidProgramException("The co-group keys must not be empty.");
        }
        try {
          keys1.areCompatible(keys2);
        } catch (IncompatibleKeysException ike) {
          throw new InvalidProgramException(
              "The pair of co-group keys are not compatible with each other.", ike);
        }

        return new CoGroupOperatorWithoutFunction(keys2);
      }
Example #5
0
      private CoGroupOperatorSetsPredicate(Keys<I1> keys1) {
        if (keys1 == null) {
          throw new NullPointerException();
        }

        if (keys1.isEmpty()) {
          throw new InvalidProgramException("The co-group keys must not be empty.");
        }

        this.keys1 = keys1;
      }
Example #6
0
        private CoGroupOperatorWithoutFunction(Keys<I2> keys2) {
          if (keys2 == null) {
            throw new NullPointerException();
          }
          if (keys2.isEmpty()) {
            throw new InvalidProgramException("The co-group keys must not be empty.");
          }

          this.keys2 = keys2;

          this.groupSortKeyOrderFirst = new ArrayList<>();
          this.groupSortKeyOrderSecond = new ArrayList<>();
        }
Example #7
0
  @Override
  protected org.apache.flink.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?>
      translateToDataFlow(Operator<I1> input1, Operator<I2> input2) {

    String name = getName() != null ? getName() : "CoGroup at " + defaultName;
    try {
      keys1.areCompatible(keys2);
    } catch (IncompatibleKeysException e) {
      throw new InvalidProgramException("The types of the key fields do not match.", e);
    }

    final org.apache.flink.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> po;

    if (keys1 instanceof SelectorFunctionKeys && keys2 instanceof SelectorFunctionKeys) {

      @SuppressWarnings("unchecked")
      SelectorFunctionKeys<I1, ?> selectorKeys1 = (SelectorFunctionKeys<I1, ?>) keys1;
      @SuppressWarnings("unchecked")
      SelectorFunctionKeys<I2, ?> selectorKeys2 = (SelectorFunctionKeys<I2, ?>) keys2;

      po =
          translateSelectorFunctionCoGroup(
              selectorKeys1, selectorKeys2, function, getResultType(), name, input1, input2);

      po.setParallelism(getParallelism());
      po.setCustomPartitioner(customPartitioner);
    } else if (keys2 instanceof SelectorFunctionKeys) {

      int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();

      @SuppressWarnings("unchecked")
      SelectorFunctionKeys<I2, ?> selectorKeys2 = (SelectorFunctionKeys<I2, ?>) keys2;

      po =
          translateSelectorFunctionCoGroupRight(
              logicalKeyPositions1,
              selectorKeys2,
              function,
              getInput1Type(),
              getResultType(),
              name,
              input1,
              input2);

      po.setParallelism(getParallelism());
      po.setCustomPartitioner(customPartitioner);
    } else if (keys1 instanceof SelectorFunctionKeys) {

      @SuppressWarnings("unchecked")
      SelectorFunctionKeys<I1, ?> selectorKeys1 = (SelectorFunctionKeys<I1, ?>) keys1;

      int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();

      po =
          translateSelectorFunctionCoGroupLeft(
              selectorKeys1,
              logicalKeyPositions2,
              function,
              getInput2Type(),
              getResultType(),
              name,
              input1,
              input2);
    } else if (keys1 instanceof Keys.ExpressionKeys && keys2 instanceof Keys.ExpressionKeys) {
      try {
        keys1.areCompatible(keys2);
      } catch (IncompatibleKeysException e) {
        throw new InvalidProgramException("The types of the key fields do not match.", e);
      }

      int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
      int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();

      CoGroupOperatorBase<I1, I2, OUT, CoGroupFunction<I1, I2, OUT>> op =
          new CoGroupOperatorBase<>(
              function,
              new BinaryOperatorInformation<>(getInput1Type(), getInput2Type(), getResultType()),
              logicalKeyPositions1,
              logicalKeyPositions2,
              name);

      op.setFirstInput(input1);
      op.setSecondInput(input2);
      po = op;
    } else {
      throw new UnsupportedOperationException("Unrecognized or incompatible key types.");
    }

    // configure shared characteristics
    po.setParallelism(getParallelism());
    po.setCustomPartitioner(customPartitioner);

    if (groupSortKeyOrderFirst.size() > 0) {
      Ordering o = new Ordering();
      for (Pair<Integer, Order> entry : groupSortKeyOrderFirst) {
        o.appendOrdering(entry.getLeft(), null, entry.getRight());
      }
      po.setGroupOrderForInputOne(o);
    }
    if (groupSortKeyOrderSecond.size() > 0) {
      Ordering o = new Ordering();
      for (Pair<Integer, Order> entry : groupSortKeyOrderSecond) {
        o.appendOrdering(entry.getLeft(), null, entry.getRight());
      }
      po.setGroupOrderForInputTwo(o);
    }

    return po;
  }