Exemplo n.º 1
0
    @Override
    public boolean isRemove(
        FlowProcess flowProcess, FilterCall<LinkedHashMap<Tuple, Object>> filterCall) {
      // we assume its more painful to create lots of tuple copies vs comparisons
      Tuple args = TupleHasher.wrapTuple(tupleHasher, filterCall.getArguments().getTuple());

      switch (include) {
        case ALL:
          break;

        case NO_NULLS:
          if (Tuples.frequency(args, null) == args.size()) return true;

          break;
      }

      if (filterCall.getContext().containsKey(args)) {
        flowProcess.increment(Cache.Num_Keys_Hit, 1);
        return true;
      }

      // only do the copy here
      filterCall
          .getContext()
          .put(TupleHasher.wrapTuple(tupleHasher, filterCall.getArguments().getTupleCopy()), null);

      flowProcess.increment(Cache.Num_Keys_Missed, 1);

      return false;
    }
Exemplo n.º 2
0
  /**
   * Constructor Unique creates a new Unique instance.
   *
   * @param name of type String
   * @param pipes of type Pipe[]
   * @param uniqueFields of type Fields
   * @param threshold of type int
   */
  @ConstructorProperties({"name", "pipes", "uniqueFields", "include", "threshold"})
  public Unique(String name, Pipe[] pipes, Fields uniqueFields, Include include, int threshold) {
    super(pipes);

    if (uniqueFields == null) throw new IllegalArgumentException("uniqueFields may not be null");

    Pipe[] filters = new Pipe[pipes.length];

    TupleHasher tupleHasher = null;
    Comparator[] comparators = uniqueFields.getComparators();

    if (!TupleHasher.isNull(comparators)) tupleHasher = new TupleHasher(null, comparators);

    FilterPartialDuplicates partialDuplicates =
        new FilterPartialDuplicates(include, threshold, tupleHasher);

    for (int i = 0; i < filters.length; i++)
      filters[i] = new Each(pipes[i], uniqueFields, partialDuplicates);

    Pipe pipe = new GroupBy(name, filters, uniqueFields);
    pipe = new Every(pipe, Fields.ALL, new FirstNBuffer(), Fields.RESULTS);

    setTails(pipe);
  }