@Test
  public void estimation() throws Exception {
    ItemsSketch<String> sketch = new ItemsSketch<String>(8);
    sketch.update("1", 1000);
    sketch.update("2", 500);
    sketch.update("3", 200);
    sketch.update("4", 100);
    sketch.update("5", 50);
    sketch.update("6", 20);
    sketch.update("7", 10);
    sketch.update("8", 5);
    sketch.update("9", 2);
    sketch.update("10");
    Tuple inputTuple =
        PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));

    EvalFunc<DataBag> func1 = new FrequentStringsSketchToEstimates("NO_FALSE_POSITIVES");
    DataBag bag1 = func1.exec(inputTuple);
    Assert.assertNotNull(bag1);
    Assert.assertTrue(bag1.size() < 10);

    EvalFunc<DataBag> func2 = new FrequentStringsSketchToEstimates("NO_FALSE_NEGATIVES");
    DataBag bag2 = func2.exec(inputTuple);
    Assert.assertNotNull(bag2);
    Assert.assertTrue(bag2.size() < 10);

    Assert.assertTrue(bag1.size() < bag2.size());
  }
  @Test
  public void exact() throws Exception {
    EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
    ItemsSketch<String> sketch = new ItemsSketch<String>(8);
    sketch.update("a");
    sketch.update("a");
    sketch.update("b");
    Tuple inputTuple =
        PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));
    DataBag bag = func.exec(inputTuple);
    Assert.assertNotNull(bag);
    Assert.assertEquals(bag.size(), 2);

    Iterator<Tuple> it = bag.iterator();
    Tuple tuple1 = it.next();
    Assert.assertEquals(tuple1.size(), 4);
    Assert.assertEquals((String) tuple1.get(0), "a");
    Assert.assertEquals((long) tuple1.get(1), 2L);
    Assert.assertEquals((long) tuple1.get(2), 2L);
    Assert.assertEquals((long) tuple1.get(3), 2L);

    Tuple tuple2 = it.next();
    Assert.assertEquals(tuple2.size(), 4);
    Assert.assertEquals((String) tuple2.get(0), "b");
    Assert.assertEquals((long) tuple2.get(1), 1L);
    Assert.assertEquals((long) tuple2.get(2), 1L);
    Assert.assertEquals((long) tuple2.get(3), 1L);
  }
 @Test
 public void emptySketch() throws Exception {
   EvalFunc<Double> func = new GetQuantileFromDoublesSketch();
   DoublesSketch sketch = DoublesSketch.builder().build();
   Double result =
       func.exec(
           tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 0.0)));
   Assert.assertEquals(result, Double.POSITIVE_INFINITY);
 }
 @Test
 public void emptySketch() throws Exception {
   EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
   ItemsSketch<String> sketch = new ItemsSketch<String>(8);
   Tuple inputTuple =
       PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));
   DataBag bag = func.exec(inputTuple);
   Assert.assertNotNull(bag);
   Assert.assertEquals(bag.size(), 0);
 }
 @Test
 public void normalCase() throws Exception {
   EvalFunc<Double> func = new GetQuantileFromDoublesSketch();
   DoublesSketch sketch = DoublesSketch.builder().build();
   sketch.update(1.0);
   Double result =
       func.exec(
           tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 0.5)));
   Assert.assertEquals(result, 1.0);
 }
  @Test
  public void testNullEmpty() throws IOException {
    EvalFunc<String> func = new SketchToString("false");
    Tuple dataTuple = null;
    String result = func.exec(dataTuple);
    assertNull(result);

    dataTuple = TupleFactory.getInstance().newTuple(0);
    result = func.exec(dataTuple);
    assertNull(result);
  }
  @Test
  public void testExactWithDetail() throws IOException {
    EvalFunc<String> func = new SketchToString("true");

    Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
    dataTuple.set(0, createDbaFromQssRange(64, 0, 64));

    String result = func.exec(dataTuple);
    assertNotNull(result);
    assertTrue(result.contains("SUMMARY"));
    assertTrue(result.contains("SKETCH DATA DETAIL"));
  }
  @Test
  public void testExactNoDetailWithSeed() throws IOException {
    EvalFunc<String> func = new SketchToString("false", Long.toString(DEFAULT_UPDATE_SEED));

    Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
    dataTuple.set(0, createDbaFromQssRange(64, 0, 64));

    String result = func.exec(dataTuple);
    assertNotNull(result);
    assertTrue(result.contains("SUMMARY"));
    assertFalse(result.contains("SKETCH DATA DETAIL"));
  }
 @Test
 public void schema() throws Exception {
   EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
   Schema schema = func.outputSchema(null);
   Assert.assertNotNull(schema);
   Assert.assertEquals(schema.size(), 1);
   Assert.assertEquals(schema.getField(0).type, DataType.BAG);
   Assert.assertEquals(schema.getField(0).schema.size(), 1);
   Assert.assertEquals(schema.getField(0).schema.getField(0).type, DataType.TUPLE);
   Assert.assertEquals(schema.getField(0).schema.getField(0).schema.size(), 4);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(0).type, DataType.CHARARRAY);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(1).type, DataType.LONG);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(2).type, DataType.LONG);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(3).type, DataType.LONG);
 }
  protected static Tuple accumulate(Tuple input, EvalFunc evalFunc)
      throws ExecException, NumberFormatException {
    DataBag values = (DataBag) input.get(0);
    int numberOfTheColumns = values.iterator().next().size();
    Tuple tupTmp = createInitTuple(input);

    long progressCounter = 0;
    for (Iterator<Tuple> it = values.iterator(); it.hasNext(); ) {
      Tuple t = it.next();
      if ((++progressCounter % 1000) == 0) {
        progressCounter = 0;
        evalFunc.progress();
      }
      if (isTupleMarked(t)) {
        // removeTheMarker(t);
      } else {
        t = buildInitialTupleForTheRow(t);
      }
      mergeResultsIntoAggregation(tupTmp, t);
    }
    markTheTuple(tupTmp);
    return tupTmp;
  }
 @Test
 public void emptyInput() throws Exception {
   EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
   DataBag bag = func.exec(TupleFactory.getInstance().newTuple());
   Assert.assertNull(bag);
 }
 @Test
 public void nullInput() throws Exception {
   EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
   DataBag bag = func.exec(null);
   Assert.assertNull(bag);
 }
  @SuppressWarnings("null")
  @Test
  public void outputSchemaTest() throws IOException {
    EvalFunc<String> udf = new SketchToString();

    Schema inputSchema = null;
    Schema.FieldSchema inputFieldSchema = new Schema.FieldSchema("Sketch", DataType.BYTEARRAY);

    Schema nullOutputSchema = null;

    Schema outputSchema = null;
    Schema.FieldSchema outputOuterFs0 = null;

    Schema outputInnerSchema = null;
    Schema.FieldSchema outputInnerFs0 = null;

    inputSchema = new Schema(inputFieldSchema);

    nullOutputSchema = udf.outputSchema(null);

    outputSchema = udf.outputSchema(inputSchema);
    outputOuterFs0 = outputSchema.getField(0);

    outputInnerSchema = outputOuterFs0.schema;
    outputInnerFs0 = outputInnerSchema.getField(0);

    Assert.assertNull(nullOutputSchema, "Should be null");
    Assert.assertNotNull(outputOuterFs0, "outputSchema.getField(0) schema may not be null");

    String expected = "tuple";
    String result = DataType.findTypeName(outputOuterFs0.type);
    Assert.assertEquals(result, expected);

    expected = "chararray";
    Assert.assertNotNull(outputInnerFs0, "innerSchema.getField(0) schema may not be null");
    result = DataType.findTypeName(outputInnerFs0.type);
    Assert.assertEquals(result, expected);

    // print schemas
    // @formatter:off
    StringBuilder sb = new StringBuilder();
    sb.append("input schema: ")
        .append(inputSchema)
        .append(LS)
        .append("output schema: ")
        .append(outputSchema)
        .append(LS)
        .append("outputOuterFs: ")
        .append(outputOuterFs0)
        .append(", type: ")
        .append(DataType.findTypeName(outputOuterFs0.type))
        .append(LS)
        .append("outputInnerSchema: ")
        .append(outputInnerSchema)
        .append(LS)
        .append("outputInnerFs0: ")
        .append(outputInnerFs0)
        .append(", type: ")
        .append(DataType.findTypeName(outputInnerFs0.type))
        .append(LS);
    println(sb.toString());
    // @formatter:on
    // end print schemas
  }
 @Test(expectedExceptions = IllegalArgumentException.class)
 public void wrongTypeForFraction() throws Exception {
   EvalFunc<Double> func = new GetQuantileFromDoublesSketch();
   DoublesSketch sketch = DoublesSketch.builder().build();
   func.exec(tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 1)));
 }
 @Test(expectedExceptions = IllegalArgumentException.class)
 public void wrongTypeForSketch() throws Exception {
   EvalFunc<Double> func = new GetQuantileFromDoublesSketch();
   func.exec(tupleFactory.newTuple(Arrays.asList(1.0, 1.0)));
 }
 @Test(expectedExceptions = IllegalArgumentException.class)
 public void wrongNumberOfInputs() throws Exception {
   EvalFunc<Double> func = new GetQuantileFromDoublesSketch();
   func.exec(tupleFactory.newTuple(1));
 }