@Override public boolean isRemove( FlowProcess flowProcess, FilterCall<LinkedHashMap<Tuple, Object>> filterCall) { // we assume its more painful to create lots of tuple copies vs comparisons Tuple args = TupleHasher.wrapTuple(tupleHasher, filterCall.getArguments().getTuple()); switch (include) { case ALL: break; case NO_NULLS: if (Tuples.frequency(args, null) == args.size()) return true; break; } if (filterCall.getContext().containsKey(args)) { flowProcess.increment(Cache.Num_Keys_Hit, 1); return true; } // only do the copy here filterCall .getContext() .put(TupleHasher.wrapTuple(tupleHasher, filterCall.getArguments().getTupleCopy()), null); flowProcess.increment(Cache.Num_Keys_Missed, 1); return false; }
/** * Constructor Unique creates a new Unique instance. * * @param name of type String * @param pipes of type Pipe[] * @param uniqueFields of type Fields * @param threshold of type int */ @ConstructorProperties({"name", "pipes", "uniqueFields", "include", "threshold"}) public Unique(String name, Pipe[] pipes, Fields uniqueFields, Include include, int threshold) { super(pipes); if (uniqueFields == null) throw new IllegalArgumentException("uniqueFields may not be null"); Pipe[] filters = new Pipe[pipes.length]; TupleHasher tupleHasher = null; Comparator[] comparators = uniqueFields.getComparators(); if (!TupleHasher.isNull(comparators)) tupleHasher = new TupleHasher(null, comparators); FilterPartialDuplicates partialDuplicates = new FilterPartialDuplicates(include, threshold, tupleHasher); for (int i = 0; i < filters.length; i++) filters[i] = new Each(pipes[i], uniqueFields, partialDuplicates); Pipe pipe = new GroupBy(name, filters, uniqueFields); pipe = new Every(pipe, Fields.ALL, new FirstNBuffer(), Fields.RESULTS); setTails(pipe); }