Exemple #1
0
  @Test
  public void testPersistMergeCaseInsensitive() throws Exception {
    final long timestamp = System.currentTimeMillis();
    IncrementalIndex toPersist1 = IncrementalIndexTest.createCaseInsensitiveIndex(timestamp);

    IncrementalIndex toPersist2 =
        new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[] {});

    toPersist2.add(
        new MapBasedInputRow(
            timestamp,
            Arrays.asList("DIm1", "DIM2"),
            ImmutableMap.<String, Object>of(
                "dim1", "1", "dim2", "2", "DIm1", "10000", "DIM2", "100000000")));

    toPersist2.add(
        new MapBasedInputRow(
            timestamp,
            Arrays.asList("dIM1", "dIm2"),
            ImmutableMap.<String, Object>of("DIm1", "1", "DIM2", "2", "dim1", "5", "dim2", "6")));

    final File tempDir1 = Files.createTempDir();
    final File tempDir2 = Files.createTempDir();
    final File mergedDir = Files.createTempDir();
    try {
      QueryableIndex index1 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tempDir1));

      Assert.assertEquals(2, index1.getTimeColumn().getLength());
      Assert.assertEquals(
          Arrays.asList("dim1", "dim2"), Lists.newArrayList(index1.getAvailableDimensions()));
      Assert.assertEquals(2, index1.getColumnNames().size());

      QueryableIndex index2 = IndexIO.loadIndex(IndexMerger.persist(toPersist2, tempDir2));

      Assert.assertEquals(2, index2.getTimeColumn().getLength());
      Assert.assertEquals(
          Arrays.asList("dim1", "dim2"), Lists.newArrayList(index2.getAvailableDimensions()));
      Assert.assertEquals(2, index2.getColumnNames().size());

      QueryableIndex merged =
          IndexIO.loadIndex(
              IndexMerger.mergeQueryableIndex(
                  Arrays.asList(index1, index2), new AggregatorFactory[] {}, mergedDir));

      Assert.assertEquals(3, merged.getTimeColumn().getLength());
      Assert.assertEquals(
          Arrays.asList("dim1", "dim2"), Lists.newArrayList(merged.getAvailableDimensions()));
      Assert.assertEquals(2, merged.getColumnNames().size());
    } finally {
      FileUtils.deleteQuietly(tempDir1);
      FileUtils.deleteQuietly(tempDir2);
      FileUtils.deleteQuietly(mergedDir);
    }
  }
Exemple #2
0
 private static IncrementalIndex buildIncrementalIndexWithRows(
     IncrementalIndexSchema schema, int maxRows, Iterable<InputRow> rows) {
   Preconditions.checkNotNull(schema, "schema");
   final IncrementalIndex incrementalIndex = new OnheapIncrementalIndex(schema, true, maxRows);
   for (InputRow row : rows) {
     try {
       incrementalIndex.add(row);
     } catch (IndexSizeExceededException e) {
       throw Throwables.propagate(e);
     }
   }
   return incrementalIndex;
 }
Exemple #3
0
  @Test
  public void testPersistEmptyColumn() throws Exception {
    final IncrementalIndex toPersist1 =
        new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[] {});
    final IncrementalIndex toPersist2 =
        new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[] {});
    final File tmpDir1 = Files.createTempDir();
    final File tmpDir2 = Files.createTempDir();
    final File tmpDir3 = Files.createTempDir();

    try {
      toPersist1.add(
          new MapBasedInputRow(
              1L,
              ImmutableList.of("dim1", "dim2"),
              ImmutableMap.<String, Object>of("dim1", ImmutableList.of(), "dim2", "foo")));

      toPersist2.add(
          new MapBasedInputRow(
              1L,
              ImmutableList.of("dim1", "dim2"),
              ImmutableMap.<String, Object>of("dim1", ImmutableList.of(), "dim2", "bar")));

      final QueryableIndex index1 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tmpDir1));
      final QueryableIndex index2 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tmpDir2));
      final QueryableIndex merged =
          IndexIO.loadIndex(
              IndexMerger.mergeQueryableIndex(
                  Arrays.asList(index1, index2), new AggregatorFactory[] {}, tmpDir3));

      Assert.assertEquals(1, index1.getTimeColumn().getLength());
      Assert.assertEquals(
          ImmutableList.of("dim2"), ImmutableList.copyOf(index1.getAvailableDimensions()));

      Assert.assertEquals(1, index2.getTimeColumn().getLength());
      Assert.assertEquals(
          ImmutableList.of("dim2"), ImmutableList.copyOf(index2.getAvailableDimensions()));

      Assert.assertEquals(1, merged.getTimeColumn().getLength());
      Assert.assertEquals(
          ImmutableList.of("dim2"), ImmutableList.copyOf(merged.getAvailableDimensions()));
    } finally {
      FileUtils.deleteQuietly(tmpDir1);
      FileUtils.deleteQuietly(tmpDir2);
      FileUtils.deleteQuietly(tmpDir3);
    }
  }
  private void createTestIndex(File segmentDir) throws Exception {
    List<String> rows =
        Lists.newArrayList(
            "2014102200,host1,10",
            "2014102200,host2,20",
            "2014102200,host3,30",
            "2014102201,host1,10",
            "2014102201,host2,20",
            "2014102201,host3,30",
            "2014102202,host1,10",
            "2014102202,host2,20",
            "2014102202,host3,30");

    StringInputRowParser parser =
        new StringInputRowParser(
            new CSVParseSpec(
                new TimestampSpec("timestamp", "yyyyMMddHH", null),
                new DimensionsSpec(ImmutableList.of("host"), null, null),
                null,
                ImmutableList.of("timestamp", "host", "visited")),
            Charsets.UTF_8.toString());

    AggregatorFactory[] aggregators =
        new AggregatorFactory[] {new LongSumAggregatorFactory("visited_sum", "visited")};

    IncrementalIndex index = null;
    try {
      index = new OnheapIncrementalIndex(0, QueryGranularity.NONE, aggregators, true, 5000);
      for (String line : rows) {
        index.add(parser.parse(line));
      }
      IndexMerger.persist(index, segmentDir, null, new IndexSpec());
    } finally {
      if (index != null) {
        index.close();
      }
    }
  }
  @BeforeClass
  public static void setupClass() throws Exception {
    incrementalIndex =
        new OnheapIncrementalIndex(
            0,
            QueryGranularity.NONE,
            new AggregatorFactory[] {new CountAggregatorFactory("count")},
            true,
            5000);

    StringInputRowParser parser =
        new StringInputRowParser(
            new CSVParseSpec(
                new TimestampSpec("timestamp", "iso", null),
                new DimensionsSpec(ImmutableList.of("product", "tags"), null, null),
                "\t",
                ImmutableList.of("timestamp", "product", "tags")),
            "UTF-8");

    String[] rows =
        new String[] {
          "2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3",
          "2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5",
          "2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7",
        };

    for (String row : rows) {
      incrementalIndex.add(parser.parse(row));
    }

    persistedSegmentDir = Files.createTempDir();
    TestHelper.getTestIndexMerger()
        .persist(
            incrementalIndex,
            persistedSegmentDir,
            ImmutableMap.<String, Object>of(),
            new IndexSpec());

    queryableIndex = TestHelper.getTestIndexIO().loadIndex(persistedSegmentDir);
  }
  @Setup
  public void setup() throws IOException {
    log.info("SETUP CALLED AT " + System.currentTimeMillis());

    if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
      ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(Hashing.murmur3_128()));
    }

    executorService = Execs.multiThreaded(numSegments, "TimeseriesThreadPool");

    setupQueries();

    String[] schemaQuery = schemaAndQuery.split("\\.");
    String schemaName = schemaQuery[0];
    String queryName = schemaQuery[1];

    schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schemaName);
    query = SCHEMA_QUERY_MAP.get(schemaName).get(queryName);

    incIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
      log.info("Generating rows for segment " + i);
      BenchmarkDataGenerator gen =
          new BenchmarkDataGenerator(
              schemaInfo.getColumnSchemas(),
              RNG_SEED + i,
              schemaInfo.getDataInterval(),
              rowsPerSegment);

      IncrementalIndex incIndex = makeIncIndex();

      for (int j = 0; j < rowsPerSegment; j++) {
        InputRow row = gen.nextRow();
        if (j % 10000 == 0) {
          log.info(j + " rows generated.");
        }
        incIndex.add(row);
      }
      log.info(rowsPerSegment + " rows generated");
      incIndexes.add(incIndex);
    }

    File tmpFile = Files.createTempDir();
    log.info("Using temp dir: " + tmpFile.getAbsolutePath());
    tmpFile.deleteOnExit();

    qIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
      File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpFile, new IndexSpec());

      QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
      qIndexes.add(qIndex);
    }

    factory =
        new TimeseriesQueryRunnerFactory(
            new TimeseriesQueryQueryToolChest(
                QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()),
            new TimeseriesQueryEngine(),
            QueryBenchmarkUtil.NOOP_QUERYWATCHER);
  }
 @AfterClass
 public static void cleanup() throws Exception {
   queryableIndex.close();
   incrementalIndex.close();
   FileUtils.deleteDirectory(persistedSegmentDir);
 }
  @Override
  public Sequence<Row> processSubqueryResult(
      GroupByQuery subquery, GroupByQuery query, Sequence<Row> subqueryResult) {
    final Set<AggregatorFactory> aggs = Sets.newHashSet();

    // Nested group-bys work by first running the inner query and then materializing the results in
    // an incremental
    // index which the outer query is then run against. To build the incremental index, we use the
    // fieldNames from
    // the aggregators for the outer query to define the column names so that the index will match
    // the query. If
    // there are multiple types of aggregators in the outer query referencing the same fieldName, we
    // will try to build
    // multiple columns of the same name using different aggregator types and will fail. Here, we
    // permit multiple
    // aggregators of the same type referencing the same fieldName (and skip creating identical
    // columns for the
    // subsequent ones) and return an error if the aggregator types are different.
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
      for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
        if (Iterables.any(
            aggs,
            new Predicate<AggregatorFactory>() {
              @Override
              public boolean apply(AggregatorFactory agg) {
                return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
              }
            })) {
          throw new IAE(
              "Inner aggregator can currently only be referenced by a single type of outer aggregator"
                  + " for '%s'",
              transferAgg.getName());
        }

        aggs.add(transferAgg);
      }
    }

    // We need the inner incremental index to have all the columns required by the outer query
    final GroupByQuery innerQuery =
        new GroupByQuery.Builder(subquery)
            .setAggregatorSpecs(Lists.newArrayList(aggs))
            .setInterval(subquery.getIntervals())
            .setPostAggregatorSpecs(Lists.<PostAggregator>newArrayList())
            .build();

    final GroupByQuery outerQuery =
        new GroupByQuery.Builder(query)
            .setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec()))
            .build();

    final IncrementalIndex innerQueryResultIndex =
        makeIncrementalIndex(
            innerQuery.withOverriddenContext(
                ImmutableMap.<String, Object>of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)),
            subqueryResult);

    // Outer query might have multiple intervals, but they are expected to be non-overlapping and
    // sorted which
    // is ensured by QuerySegmentSpec.
    // GroupByQueryEngine can only process one interval at a time, so we need to call it once per
    // interval
    // and concatenate the results.
    final IncrementalIndex outerQueryResultIndex =
        makeIncrementalIndex(
            outerQuery,
            Sequences.concat(
                Sequences.map(
                    Sequences.simple(outerQuery.getIntervals()),
                    new Function<Interval, Sequence<Row>>() {
                      @Override
                      public Sequence<Row> apply(Interval interval) {
                        return process(
                            outerQuery.withQuerySegmentSpec(
                                new MultipleIntervalSegmentSpec(ImmutableList.of(interval))),
                            new IncrementalIndexStorageAdapter(innerQueryResultIndex));
                      }
                    })));

    innerQueryResultIndex.close();

    return new ResourceClosingSequence<>(
        outerQuery.applyLimit(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)),
        outerQueryResultIndex);
  }