@Test public void estimation() throws Exception { ItemsSketch<String> sketch = new ItemsSketch<String>(8); sketch.update("1", 1000); sketch.update("2", 500); sketch.update("3", 200); sketch.update("4", 100); sketch.update("5", 50); sketch.update("6", 20); sketch.update("7", 10); sketch.update("8", 5); sketch.update("9", 2); sketch.update("10"); Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))); EvalFunc<DataBag> func1 = new FrequentStringsSketchToEstimates("NO_FALSE_POSITIVES"); DataBag bag1 = func1.exec(inputTuple); Assert.assertNotNull(bag1); Assert.assertTrue(bag1.size() < 10); EvalFunc<DataBag> func2 = new FrequentStringsSketchToEstimates("NO_FALSE_NEGATIVES"); DataBag bag2 = func2.exec(inputTuple); Assert.assertNotNull(bag2); Assert.assertTrue(bag2.size() < 10); Assert.assertTrue(bag1.size() < bag2.size()); }
@Test public void exact() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); ItemsSketch<String> sketch = new ItemsSketch<String>(8); sketch.update("a"); sketch.update("a"); sketch.update("b"); Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))); DataBag bag = func.exec(inputTuple); Assert.assertNotNull(bag); Assert.assertEquals(bag.size(), 2); Iterator<Tuple> it = bag.iterator(); Tuple tuple1 = it.next(); Assert.assertEquals(tuple1.size(), 4); Assert.assertEquals((String) tuple1.get(0), "a"); Assert.assertEquals((long) tuple1.get(1), 2L); Assert.assertEquals((long) tuple1.get(2), 2L); Assert.assertEquals((long) tuple1.get(3), 2L); Tuple tuple2 = it.next(); Assert.assertEquals(tuple2.size(), 4); Assert.assertEquals((String) tuple2.get(0), "b"); Assert.assertEquals((long) tuple2.get(1), 1L); Assert.assertEquals((long) tuple2.get(2), 1L); Assert.assertEquals((long) tuple2.get(3), 1L); }
@Test public void emptySketch() throws Exception { EvalFunc<Double> func = new GetQuantileFromDoublesSketch(); DoublesSketch sketch = DoublesSketch.builder().build(); Double result = func.exec( tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 0.0))); Assert.assertEquals(result, Double.POSITIVE_INFINITY); }
@Test public void emptySketch() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); ItemsSketch<String> sketch = new ItemsSketch<String>(8); Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))); DataBag bag = func.exec(inputTuple); Assert.assertNotNull(bag); Assert.assertEquals(bag.size(), 0); }
@Test public void normalCase() throws Exception { EvalFunc<Double> func = new GetQuantileFromDoublesSketch(); DoublesSketch sketch = DoublesSketch.builder().build(); sketch.update(1.0); Double result = func.exec( tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 0.5))); Assert.assertEquals(result, 1.0); }
@Test public void testNullEmpty() throws IOException { EvalFunc<String> func = new SketchToString("false"); Tuple dataTuple = null; String result = func.exec(dataTuple); assertNull(result); dataTuple = TupleFactory.getInstance().newTuple(0); result = func.exec(dataTuple); assertNull(result); }
@Test public void testExactWithDetail() throws IOException { EvalFunc<String> func = new SketchToString("true"); Tuple dataTuple = TupleFactory.getInstance().newTuple(1); dataTuple.set(0, createDbaFromQssRange(64, 0, 64)); String result = func.exec(dataTuple); assertNotNull(result); assertTrue(result.contains("SUMMARY")); assertTrue(result.contains("SKETCH DATA DETAIL")); }
@Test public void testExactNoDetailWithSeed() throws IOException { EvalFunc<String> func = new SketchToString("false", Long.toString(DEFAULT_UPDATE_SEED)); Tuple dataTuple = TupleFactory.getInstance().newTuple(1); dataTuple.set(0, createDbaFromQssRange(64, 0, 64)); String result = func.exec(dataTuple); assertNotNull(result); assertTrue(result.contains("SUMMARY")); assertFalse(result.contains("SKETCH DATA DETAIL")); }
@Test public void schema() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); Schema schema = func.outputSchema(null); Assert.assertNotNull(schema); Assert.assertEquals(schema.size(), 1); Assert.assertEquals(schema.getField(0).type, DataType.BAG); Assert.assertEquals(schema.getField(0).schema.size(), 1); Assert.assertEquals(schema.getField(0).schema.getField(0).type, DataType.TUPLE); Assert.assertEquals(schema.getField(0).schema.getField(0).schema.size(), 4); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(0).type, DataType.CHARARRAY); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(1).type, DataType.LONG); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(2).type, DataType.LONG); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(3).type, DataType.LONG); }
protected static Tuple accumulate(Tuple input, EvalFunc evalFunc) throws ExecException, NumberFormatException { DataBag values = (DataBag) input.get(0); int numberOfTheColumns = values.iterator().next().size(); Tuple tupTmp = createInitTuple(input); long progressCounter = 0; for (Iterator<Tuple> it = values.iterator(); it.hasNext(); ) { Tuple t = it.next(); if ((++progressCounter % 1000) == 0) { progressCounter = 0; evalFunc.progress(); } if (isTupleMarked(t)) { // removeTheMarker(t); } else { t = buildInitialTupleForTheRow(t); } mergeResultsIntoAggregation(tupTmp, t); } markTheTuple(tupTmp); return tupTmp; }
@Test public void emptyInput() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); DataBag bag = func.exec(TupleFactory.getInstance().newTuple()); Assert.assertNull(bag); }
@Test public void nullInput() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); DataBag bag = func.exec(null); Assert.assertNull(bag); }
@SuppressWarnings("null") @Test public void outputSchemaTest() throws IOException { EvalFunc<String> udf = new SketchToString(); Schema inputSchema = null; Schema.FieldSchema inputFieldSchema = new Schema.FieldSchema("Sketch", DataType.BYTEARRAY); Schema nullOutputSchema = null; Schema outputSchema = null; Schema.FieldSchema outputOuterFs0 = null; Schema outputInnerSchema = null; Schema.FieldSchema outputInnerFs0 = null; inputSchema = new Schema(inputFieldSchema); nullOutputSchema = udf.outputSchema(null); outputSchema = udf.outputSchema(inputSchema); outputOuterFs0 = outputSchema.getField(0); outputInnerSchema = outputOuterFs0.schema; outputInnerFs0 = outputInnerSchema.getField(0); Assert.assertNull(nullOutputSchema, "Should be null"); Assert.assertNotNull(outputOuterFs0, "outputSchema.getField(0) schema may not be null"); String expected = "tuple"; String result = DataType.findTypeName(outputOuterFs0.type); Assert.assertEquals(result, expected); expected = "chararray"; Assert.assertNotNull(outputInnerFs0, "innerSchema.getField(0) schema may not be null"); result = DataType.findTypeName(outputInnerFs0.type); Assert.assertEquals(result, expected); // print schemas // @formatter:off StringBuilder sb = new StringBuilder(); sb.append("input schema: ") .append(inputSchema) .append(LS) .append("output schema: ") .append(outputSchema) .append(LS) .append("outputOuterFs: ") .append(outputOuterFs0) .append(", type: ") .append(DataType.findTypeName(outputOuterFs0.type)) .append(LS) .append("outputInnerSchema: ") .append(outputInnerSchema) .append(LS) .append("outputInnerFs0: ") .append(outputInnerFs0) .append(", type: ") .append(DataType.findTypeName(outputInnerFs0.type)) .append(LS); println(sb.toString()); // @formatter:on // end print schemas }
@Test(expectedExceptions = IllegalArgumentException.class) public void wrongTypeForFraction() throws Exception { EvalFunc<Double> func = new GetQuantileFromDoublesSketch(); DoublesSketch sketch = DoublesSketch.builder().build(); func.exec(tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 1))); }
@Test(expectedExceptions = IllegalArgumentException.class) public void wrongTypeForSketch() throws Exception { EvalFunc<Double> func = new GetQuantileFromDoublesSketch(); func.exec(tupleFactory.newTuple(Arrays.asList(1.0, 1.0))); }
@Test(expectedExceptions = IllegalArgumentException.class) public void wrongNumberOfInputs() throws Exception { EvalFunc<Double> func = new GetQuantileFromDoublesSketch(); func.exec(tupleFactory.newTuple(1)); }