Beispiel #1
0
  @BeforeClass
  public static void before() throws Exception {
    final String filePath =
        TestUtils.getFileFromResourceUrl(
            DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
      FileUtils.deleteQuietly(INDEX_DIR);
    }

    final SegmentGeneratorConfig config =
        SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(
            new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");

    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();

    final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath));

    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
      columns[i] = f.name();
      i++;
    }

    uniqueEntries = new HashMap<String, Set<Object>>();
    for (final String column : columns) {
      uniqueEntries.put(column, new HashSet<Object>());
    }

    while (avroReader.hasNext()) {
      final GenericRecord rec = avroReader.next();
      for (final String column : columns) {
        Object val = rec.get(column);
        if (val instanceof Utf8) {
          val = ((Utf8) val).toString();
        }
        uniqueEntries
            .get(column)
            .add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val));
      }
    }
  }
  @Test
  public void testAvroNativeJson() throws IOException {
    AvroNativeFileOutputFormat format = new AvroNativeFileOutputFormat();
    ByteArrayOutputStream sos = new ByteArrayOutputStream();
    format.format(sos, e);
    format.close();
    byte[] bytes = sos.toByteArray();

    ReflectData reflectData = ReflectData.get();
    Schema schema = reflectData.getSchema(EventImpl.class);
    ReflectDatumReader<EventImpl> dr = new ReflectDatumReader<EventImpl>(schema);
    ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
    DataFileStream<EventImpl> dec = new DataFileStream<EventImpl>(bais, dr);

    Event er = dec.next();
    assertEquals(e.getHost(), er.getHost());
    assertEquals(e.getNanos(), er.getNanos());
    assertEquals(e.getPriority(), er.getPriority());
    assertTrue(Arrays.equals(e.getBody(), er.getBody()));
  }
Beispiel #3
0
  public void generateSimpleAggregationOnSingleColumnFilters() throws IOException {
    final Map<String, Map<Object, Integer>> cardinalityCountsMap =
        new HashMap<String, Map<Object, Integer>>();
    final Map<String, Map<Object, Map<String, Double>>> sumMap =
        new HashMap<String, Map<Object, Map<String, Double>>>();
    // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue

    final Map<String, Map<Object, Double>> sumGroupBy = new HashMap<String, Map<Object, Double>>();

    aggregationQueries = new ArrayList<AvroQueryGenerator.TestSimpleAggreationQuery>();
    groupByQueries = new ArrayList<AvroQueryGenerator.TestGroupByAggreationQuery>();
    for (final Field f : schema.getFields()) {
      final String fieldName = f.name();
      if (dimensions.contains(fieldName) || metrics.contains(fieldName) || time.equals(fieldName)) {
        isSingleValueMap.put(fieldName, isSingleValueField(f));
        dataTypeMap.put(fieldName, getColumnType(f));
        if (!metrics.contains(fieldName)) {
          cardinalityCountsMap.put(fieldName, new HashMap<Object, Integer>());
        }
      }
    }

    for (final String column : cardinalityCountsMap.keySet()) {
      sumMap.put(column, new HashMap<Object, Map<String, Double>>());
    }

    // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue

    while (dataStream.hasNext()) {
      final GenericRecord record = dataStream.next();

      for (final String column : cardinalityCountsMap.keySet()) {
        Object value = record.get(column);

        if (value == null) {
          switch (schema.getField(column).schema().getType()) {
            case INT:
              value = 0;
              break;
            case FLOAT:
              value = 0F;
              break;
            case LONG:
              value = 0L;
              break;
            case DOUBLE:
              value = 0D;
              break;
            case STRING:
            case BOOLEAN:
              value = "null";
              break;
          }
        }

        if (value instanceof Utf8) {
          value = ((Utf8) value).toString();
        }

        if (value instanceof Array) {
          continue;
        }

        // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue

        for (final String metricName : metrics) {
          final String groupbyKeyBase = column + ":" + record.get(column) + ":" + metricName;
          int dimCounter = 1;
          for (final String dim : cardinalityCountsMap.keySet()) {
            if (!dim.equals(column)) {
              dimCounter++;
              final String groupbyKey = groupbyKeyBase + ":" + dim;
              if (sumGroupBy.containsKey(groupbyKey)) {
                if (sumGroupBy.get(groupbyKey).containsKey(record.get(dim))) {
                  sumGroupBy
                      .get(groupbyKey)
                      .put(
                          record.get(dim),
                          getAppropriateNumberType(
                              metricName,
                              record.get(metricName),
                              sumGroupBy.get(groupbyKey).get(record.get(dim))));
                } else {
                  sumGroupBy
                      .get(groupbyKey)
                      .put(record.get(dim), Double.parseDouble(record.get(metricName).toString()));
                }
              } else {
                sumGroupBy.put(groupbyKey, new HashMap<Object, Double>());
                sumGroupBy
                    .get(groupbyKey)
                    .put(record.get(dim), Double.parseDouble(record.get(metricName).toString()));
              }
            }
            if (dimCounter == 4) {
              break;
            }
          }
        }

        if (cardinalityCountsMap.get(column).containsKey(value)) {
          cardinalityCountsMap
              .get(column)
              .put(value, cardinalityCountsMap.get(column).get(value) + 1);
        } else {
          cardinalityCountsMap.get(column).put(value, 1);
        }

        if (!sumMap.get(column).containsKey(value)) {
          sumMap.get(column).put(value, new HashMap<String, Double>());
        }

        for (final String metric : metrics) {
          if (!sumMap.get(column).get(value).containsKey(metric)) {
            sumMap
                .get(column)
                .get(value)
                .put(metric, getAppropriateNumberType(metric, record.get(metric), 0D));
          } else {
            sumMap
                .get(column)
                .get(value)
                .put(
                    metric,
                    getAppropriateNumberType(
                        metric, record.get(metric), sumMap.get(column).get(value).get(metric)));
          }
        }
        // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue
      }
    }

    dataStream.close();

    if (!isRealtimeSegment) {
      for (final String column : cardinalityCountsMap.keySet()) {
        for (final Object entry : cardinalityCountsMap.get(column).keySet()) {
          final StringBuilder bld = new StringBuilder();
          bld.append("select count(*) from ");
          bld.append(resourceName);
          bld.append(" where ");
          bld.append(column);
          bld.append("=");
          bld.append("'");
          bld.append(entry);
          bld.append("'");
          bld.append(" ");
          bld.append("limit 0");
          String queryString = bld.toString();
          if (!queryString.contains("null")) {
            aggregationQueries.add(
                new TestSimpleAggreationQuery(
                    queryString, new Double(cardinalityCountsMap.get(column).get(entry))));
          }
        }
      }
    }

    for (final String column : sumMap.keySet()) {
      for (final Object value : sumMap.get(column).keySet()) {
        for (final String metric : sumMap.get(column).get(value).keySet()) {
          final StringBuilder bld = new StringBuilder();
          bld.append("select sum('" + metric + "') from ");
          bld.append(resourceName);
          bld.append(" where ");
          bld.append(column);
          bld.append("=");
          bld.append("'");
          bld.append(value);
          bld.append("'");
          bld.append(" ");
          bld.append("limit 0");
          String queryString = bld.toString();
          if (!queryString.contains("null")) {
            aggregationQueries.add(
                new TestSimpleAggreationQuery(
                    bld.toString(), sumMap.get(column).get(value).get(metric)));
          }
        }
      }
    }

    for (final String groupKey : sumGroupBy.keySet()) {
      final String columnName = groupKey.split(":")[0];
      final String columnValue = groupKey.split(":")[1];
      final String metricColumn = groupKey.split(":")[2];
      final String groupByColumnName = groupKey.split(":")[3];

      final StringBuilder bld = new StringBuilder();
      bld.append("select sum('" + metricColumn + "') from ");
      bld.append(resourceName);
      bld.append(" where ");
      bld.append(columnName);
      bld.append("=");
      bld.append("'");
      bld.append(columnValue);
      bld.append("'");
      bld.append(" ");
      bld.append(" group by ");
      bld.append(groupByColumnName);
      bld.append(" top 10 ");
      bld.append("limit 0");
      String queryString = bld.toString();
      if (!queryString.contains("null")) {
        groupByQueries.add(
            new TestGroupByAggreationQuery(bld.toString(), sumGroupBy.get(groupKey)));
      }
    }
  }
Beispiel #4
0
 public void init() throws FileNotFoundException, IOException {
   dataStream =
       new DataFileStream<GenericRecord>(
           new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>());
   schema = dataStream.getSchema();
 }