@Test public void exact() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); ItemsSketch<String> sketch = new ItemsSketch<String>(8); sketch.update("a"); sketch.update("a"); sketch.update("b"); Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))); DataBag bag = func.exec(inputTuple); Assert.assertNotNull(bag); Assert.assertEquals(bag.size(), 2); Iterator<Tuple> it = bag.iterator(); Tuple tuple1 = it.next(); Assert.assertEquals(tuple1.size(), 4); Assert.assertEquals((String) tuple1.get(0), "a"); Assert.assertEquals((long) tuple1.get(1), 2L); Assert.assertEquals((long) tuple1.get(2), 2L); Assert.assertEquals((long) tuple1.get(3), 2L); Tuple tuple2 = it.next(); Assert.assertEquals(tuple2.size(), 4); Assert.assertEquals((String) tuple2.get(0), "b"); Assert.assertEquals((long) tuple2.get(1), 1L); Assert.assertEquals((long) tuple2.get(2), 1L); Assert.assertEquals((long) tuple2.get(3), 1L); }
@Test public void estimation() throws Exception { ItemsSketch<String> sketch = new ItemsSketch<String>(8); sketch.update("1", 1000); sketch.update("2", 500); sketch.update("3", 200); sketch.update("4", 100); sketch.update("5", 50); sketch.update("6", 20); sketch.update("7", 10); sketch.update("8", 5); sketch.update("9", 2); sketch.update("10"); Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))); EvalFunc<DataBag> func1 = new FrequentStringsSketchToEstimates("NO_FALSE_POSITIVES"); DataBag bag1 = func1.exec(inputTuple); Assert.assertNotNull(bag1); Assert.assertTrue(bag1.size() < 10); EvalFunc<DataBag> func2 = new FrequentStringsSketchToEstimates("NO_FALSE_NEGATIVES"); DataBag bag2 = func2.exec(inputTuple); Assert.assertNotNull(bag2); Assert.assertTrue(bag2.size() < 10); Assert.assertTrue(bag1.size() < bag2.size()); }