@Test public void estimation() throws Exception { ItemsSketch<String> sketch = new ItemsSketch<String>(8); sketch.update("1", 1000); sketch.update("2", 500); sketch.update("3", 200); sketch.update("4", 100); sketch.update("5", 50); sketch.update("6", 20); sketch.update("7", 10); sketch.update("8", 5); sketch.update("9", 2); sketch.update("10"); Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))); EvalFunc<DataBag> func1 = new FrequentStringsSketchToEstimates("NO_FALSE_POSITIVES"); DataBag bag1 = func1.exec(inputTuple); Assert.assertNotNull(bag1); Assert.assertTrue(bag1.size() < 10); EvalFunc<DataBag> func2 = new FrequentStringsSketchToEstimates("NO_FALSE_NEGATIVES"); DataBag bag2 = func2.exec(inputTuple); Assert.assertNotNull(bag2); Assert.assertTrue(bag2.size() < 10); Assert.assertTrue(bag1.size() < bag2.size()); }
@Test public void testNullEmpty() throws IOException { EvalFunc<String> func = new SketchToString("false"); Tuple dataTuple = null; String result = func.exec(dataTuple); assertNull(result); dataTuple = TupleFactory.getInstance().newTuple(0); result = func.exec(dataTuple); assertNull(result); }
@Test public void exact() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); ItemsSketch<String> sketch = new ItemsSketch<String>(8); sketch.update("a"); sketch.update("a"); sketch.update("b"); Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))); DataBag bag = func.exec(inputTuple); Assert.assertNotNull(bag); Assert.assertEquals(bag.size(), 2); Iterator<Tuple> it = bag.iterator(); Tuple tuple1 = it.next(); Assert.assertEquals(tuple1.size(), 4); Assert.assertEquals((String) tuple1.get(0), "a"); Assert.assertEquals((long) tuple1.get(1), 2L); Assert.assertEquals((long) tuple1.get(2), 2L); Assert.assertEquals((long) tuple1.get(3), 2L); Tuple tuple2 = it.next(); Assert.assertEquals(tuple2.size(), 4); Assert.assertEquals((String) tuple2.get(0), "b"); Assert.assertEquals((long) tuple2.get(1), 1L); Assert.assertEquals((long) tuple2.get(2), 1L); Assert.assertEquals((long) tuple2.get(3), 1L); }
@Test public void emptySketch() throws Exception { EvalFunc<Double> func = new GetQuantileFromDoublesSketch(); DoublesSketch sketch = DoublesSketch.builder().build(); Double result = func.exec( tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 0.0))); Assert.assertEquals(result, Double.POSITIVE_INFINITY); }
@Test public void emptySketch() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); ItemsSketch<String> sketch = new ItemsSketch<String>(8); Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))); DataBag bag = func.exec(inputTuple); Assert.assertNotNull(bag); Assert.assertEquals(bag.size(), 0); }
@Test public void normalCase() throws Exception { EvalFunc<Double> func = new GetQuantileFromDoublesSketch(); DoublesSketch sketch = DoublesSketch.builder().build(); sketch.update(1.0); Double result = func.exec( tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 0.5))); Assert.assertEquals(result, 1.0); }
@Test public void testExactWithDetail() throws IOException { EvalFunc<String> func = new SketchToString("true"); Tuple dataTuple = TupleFactory.getInstance().newTuple(1); dataTuple.set(0, createDbaFromQssRange(64, 0, 64)); String result = func.exec(dataTuple); assertNotNull(result); assertTrue(result.contains("SUMMARY")); assertTrue(result.contains("SKETCH DATA DETAIL")); }
@Test public void testExactNoDetailWithSeed() throws IOException { EvalFunc<String> func = new SketchToString("false", Long.toString(DEFAULT_UPDATE_SEED)); Tuple dataTuple = TupleFactory.getInstance().newTuple(1); dataTuple.set(0, createDbaFromQssRange(64, 0, 64)); String result = func.exec(dataTuple); assertNotNull(result); assertTrue(result.contains("SUMMARY")); assertFalse(result.contains("SKETCH DATA DETAIL")); }
@Test public void emptyInput() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); DataBag bag = func.exec(TupleFactory.getInstance().newTuple()); Assert.assertNull(bag); }
@Test public void nullInput() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); DataBag bag = func.exec(null); Assert.assertNull(bag); }
@Test(expectedExceptions = IllegalArgumentException.class) public void wrongTypeForFraction() throws Exception { EvalFunc<Double> func = new GetQuantileFromDoublesSketch(); DoublesSketch sketch = DoublesSketch.builder().build(); func.exec(tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 1))); }
@Test(expectedExceptions = IllegalArgumentException.class) public void wrongTypeForSketch() throws Exception { EvalFunc<Double> func = new GetQuantileFromDoublesSketch(); func.exec(tupleFactory.newTuple(Arrays.asList(1.0, 1.0))); }
@Test(expectedExceptions = IllegalArgumentException.class) public void wrongNumberOfInputs() throws Exception { EvalFunc<Double> func = new GetQuantileFromDoublesSketch(); func.exec(tupleFactory.newTuple(1)); }