/**
   * Applies {@code ApproximateUnique(sampleSize)} verifying that the estimation error falls within
   * the maximum allowed error of {@code 2/sqrt(sampleSize)}.
   */
  private void runApproximateUniquePipeline(int sampleSize) {
    Pipeline p = TestPipeline.create();
    PCollection<String> collection = readPCollection(p);

    final PCollectionView<Long> exact =
        collection
            .apply(RemoveDuplicates.<String>create())
            .apply(Combine.globally(new CountElements<String>()))
            .apply(View.<Long>asSingleton());

    PCollection<Long> approximate =
        collection.apply(ApproximateUnique.<String>globally(sampleSize));

    PCollection<KV<Long, Long>> approximateAndExact =
        approximate.apply(
            ParDo.of(
                    new DoFn<Long, KV<Long, Long>>() {
                      @Override
                      public void processElement(ProcessContext c) {
                        c.output(KV.of(c.element(), c.sideInput(exact)));
                      }
                    })
                .withSideInputs(exact));

    DataflowAssert.that(approximateAndExact).satisfies(new VerifyEstimatePerKeyFn(sampleSize));

    p.run();
  }
Exemplo n.º 2
0
 public static void main(String[] args) {
   RemoveDuplicates r = new RemoveDuplicates();
   System.out.println(r.removeDuplicates(new int[] {1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5}));
 }
 public static void main(String[] args) {
   int[] nums = {1, 1, 1, 2, 2, 3, 3, 3};
   RemoveDuplicates rd = new RemoveDuplicates();
   System.out.println(rd.removeDuplicatesUp(nums));
 }