/** * Applies {@code ApproximateUnique(sampleSize)} verifying that the estimation error falls within * the maximum allowed error of {@code 2/sqrt(sampleSize)}. */ private void runApproximateUniquePipeline(int sampleSize) { Pipeline p = TestPipeline.create(); PCollection<String> collection = readPCollection(p); final PCollectionView<Long> exact = collection .apply(RemoveDuplicates.<String>create()) .apply(Combine.globally(new CountElements<String>())) .apply(View.<Long>asSingleton()); PCollection<Long> approximate = collection.apply(ApproximateUnique.<String>globally(sampleSize)); PCollection<KV<Long, Long>> approximateAndExact = approximate.apply( ParDo.of( new DoFn<Long, KV<Long, Long>>() { @Override public void processElement(ProcessContext c) { c.output(KV.of(c.element(), c.sideInput(exact))); } }) .withSideInputs(exact)); DataflowAssert.that(approximateAndExact).satisfies(new VerifyEstimatePerKeyFn(sampleSize)); p.run(); }
public static void main(String[] args) { RemoveDuplicates r = new RemoveDuplicates(); System.out.println(r.removeDuplicates(new int[] {1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5})); }
public static void main(String[] args) { int[] nums = {1, 1, 1, 2, 2, 3, 3, 3}; RemoveDuplicates rd = new RemoveDuplicates(); System.out.println(rd.removeDuplicatesUp(nums)); }