@Test public void testPersistMergeCaseInsensitive() throws Exception { final long timestamp = System.currentTimeMillis(); IncrementalIndex toPersist1 = IncrementalIndexTest.createCaseInsensitiveIndex(timestamp); IncrementalIndex toPersist2 = new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[] {}); toPersist2.add( new MapBasedInputRow( timestamp, Arrays.asList("DIm1", "DIM2"), ImmutableMap.<String, Object>of( "dim1", "1", "dim2", "2", "DIm1", "10000", "DIM2", "100000000"))); toPersist2.add( new MapBasedInputRow( timestamp, Arrays.asList("dIM1", "dIm2"), ImmutableMap.<String, Object>of("DIm1", "1", "DIM2", "2", "dim1", "5", "dim2", "6"))); final File tempDir1 = Files.createTempDir(); final File tempDir2 = Files.createTempDir(); final File mergedDir = Files.createTempDir(); try { QueryableIndex index1 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tempDir1)); Assert.assertEquals(2, index1.getTimeColumn().getLength()); Assert.assertEquals( Arrays.asList("dim1", "dim2"), Lists.newArrayList(index1.getAvailableDimensions())); Assert.assertEquals(2, index1.getColumnNames().size()); QueryableIndex index2 = IndexIO.loadIndex(IndexMerger.persist(toPersist2, tempDir2)); Assert.assertEquals(2, index2.getTimeColumn().getLength()); Assert.assertEquals( Arrays.asList("dim1", "dim2"), Lists.newArrayList(index2.getAvailableDimensions())); Assert.assertEquals(2, index2.getColumnNames().size()); QueryableIndex merged = IndexIO.loadIndex( IndexMerger.mergeQueryableIndex( Arrays.asList(index1, index2), new AggregatorFactory[] {}, mergedDir)); Assert.assertEquals(3, merged.getTimeColumn().getLength()); Assert.assertEquals( Arrays.asList("dim1", "dim2"), Lists.newArrayList(merged.getAvailableDimensions())); Assert.assertEquals(2, merged.getColumnNames().size()); } finally { FileUtils.deleteQuietly(tempDir1); FileUtils.deleteQuietly(tempDir2); FileUtils.deleteQuietly(mergedDir); } }
private static IncrementalIndex buildIncrementalIndexWithRows( IncrementalIndexSchema schema, int maxRows, Iterable<InputRow> rows) { Preconditions.checkNotNull(schema, "schema"); final IncrementalIndex incrementalIndex = new OnheapIncrementalIndex(schema, true, maxRows); for (InputRow row : rows) { try { incrementalIndex.add(row); } catch (IndexSizeExceededException e) { throw Throwables.propagate(e); } } return incrementalIndex; }
@Test public void testPersistEmptyColumn() throws Exception { final IncrementalIndex toPersist1 = new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[] {}); final IncrementalIndex toPersist2 = new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[] {}); final File tmpDir1 = Files.createTempDir(); final File tmpDir2 = Files.createTempDir(); final File tmpDir3 = Files.createTempDir(); try { toPersist1.add( new MapBasedInputRow( 1L, ImmutableList.of("dim1", "dim2"), ImmutableMap.<String, Object>of("dim1", ImmutableList.of(), "dim2", "foo"))); toPersist2.add( new MapBasedInputRow( 1L, ImmutableList.of("dim1", "dim2"), ImmutableMap.<String, Object>of("dim1", ImmutableList.of(), "dim2", "bar"))); final QueryableIndex index1 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tmpDir1)); final QueryableIndex index2 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tmpDir2)); final QueryableIndex merged = IndexIO.loadIndex( IndexMerger.mergeQueryableIndex( Arrays.asList(index1, index2), new AggregatorFactory[] {}, tmpDir3)); Assert.assertEquals(1, index1.getTimeColumn().getLength()); Assert.assertEquals( ImmutableList.of("dim2"), ImmutableList.copyOf(index1.getAvailableDimensions())); Assert.assertEquals(1, index2.getTimeColumn().getLength()); Assert.assertEquals( ImmutableList.of("dim2"), ImmutableList.copyOf(index2.getAvailableDimensions())); Assert.assertEquals(1, merged.getTimeColumn().getLength()); Assert.assertEquals( ImmutableList.of("dim2"), ImmutableList.copyOf(merged.getAvailableDimensions())); } finally { FileUtils.deleteQuietly(tmpDir1); FileUtils.deleteQuietly(tmpDir2); FileUtils.deleteQuietly(tmpDir3); } }
private void createTestIndex(File segmentDir) throws Exception { List<String> rows = Lists.newArrayList( "2014102200,host1,10", "2014102200,host2,20", "2014102200,host3,30", "2014102201,host1,10", "2014102201,host2,20", "2014102201,host3,30", "2014102202,host1,10", "2014102202,host2,20", "2014102202,host3,30"); StringInputRowParser parser = new StringInputRowParser( new CSVParseSpec( new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(ImmutableList.of("host"), null, null), null, ImmutableList.of("timestamp", "host", "visited")), Charsets.UTF_8.toString()); AggregatorFactory[] aggregators = new AggregatorFactory[] {new LongSumAggregatorFactory("visited_sum", "visited")}; IncrementalIndex index = null; try { index = new OnheapIncrementalIndex(0, QueryGranularity.NONE, aggregators, true, 5000); for (String line : rows) { index.add(parser.parse(line)); } IndexMerger.persist(index, segmentDir, null, new IndexSpec()); } finally { if (index != null) { index.close(); } } }
@BeforeClass public static void setupClass() throws Exception { incrementalIndex = new OnheapIncrementalIndex( 0, QueryGranularity.NONE, new AggregatorFactory[] {new CountAggregatorFactory("count")}, true, 5000); StringInputRowParser parser = new StringInputRowParser( new CSVParseSpec( new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(ImmutableList.of("product", "tags"), null, null), "\t", ImmutableList.of("timestamp", "product", "tags")), "UTF-8"); String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3", "2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5", "2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7", }; for (String row : rows) { incrementalIndex.add(parser.parse(row)); } persistedSegmentDir = Files.createTempDir(); TestHelper.getTestIndexMerger() .persist( incrementalIndex, persistedSegmentDir, ImmutableMap.<String, Object>of(), new IndexSpec()); queryableIndex = TestHelper.getTestIndexIO().loadIndex(persistedSegmentDir); }
@Setup public void setup() throws IOException { log.info("SETUP CALLED AT " + System.currentTimeMillis()); if (ComplexMetrics.getSerdeForType("hyperUnique") == null) { ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(Hashing.murmur3_128())); } executorService = Execs.multiThreaded(numSegments, "TimeseriesThreadPool"); setupQueries(); String[] schemaQuery = schemaAndQuery.split("\\."); String schemaName = schemaQuery[0]; String queryName = schemaQuery[1]; schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schemaName); query = SCHEMA_QUERY_MAP.get(schemaName).get(queryName); incIndexes = new ArrayList<>(); for (int i = 0; i < numSegments; i++) { log.info("Generating rows for segment " + i); BenchmarkDataGenerator gen = new BenchmarkDataGenerator( schemaInfo.getColumnSchemas(), RNG_SEED + i, schemaInfo.getDataInterval(), rowsPerSegment); IncrementalIndex incIndex = makeIncIndex(); for (int j = 0; j < rowsPerSegment; j++) { InputRow row = gen.nextRow(); if (j % 10000 == 0) { log.info(j + " rows generated."); } incIndex.add(row); } log.info(rowsPerSegment + " rows generated"); incIndexes.add(incIndex); } File tmpFile = Files.createTempDir(); log.info("Using temp dir: " + tmpFile.getAbsolutePath()); tmpFile.deleteOnExit(); qIndexes = new ArrayList<>(); for (int i = 0; i < numSegments; i++) { File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpFile, new IndexSpec()); QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile); qIndexes.add(qIndex); } factory = new TimeseriesQueryRunnerFactory( new TimeseriesQueryQueryToolChest( QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER); }
@AfterClass public static void cleanup() throws Exception { queryableIndex.close(); incrementalIndex.close(); FileUtils.deleteDirectory(persistedSegmentDir); }
@Override public Sequence<Row> processSubqueryResult( GroupByQuery subquery, GroupByQuery query, Sequence<Row> subqueryResult) { final Set<AggregatorFactory> aggs = Sets.newHashSet(); // Nested group-bys work by first running the inner query and then materializing the results in // an incremental // index which the outer query is then run against. To build the incremental index, we use the // fieldNames from // the aggregators for the outer query to define the column names so that the index will match // the query. If // there are multiple types of aggregators in the outer query referencing the same fieldName, we // will try to build // multiple columns of the same name using different aggregator types and will fail. Here, we // permit multiple // aggregators of the same type referencing the same fieldName (and skip creating identical // columns for the // subsequent ones) and return an error if the aggregator types are different. for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) { for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) { if (Iterables.any( aggs, new Predicate<AggregatorFactory>() { @Override public boolean apply(AggregatorFactory agg) { return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg); } })) { throw new IAE( "Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName()); } aggs.add(transferAgg); } } // We need the inner incremental index to have all the columns required by the outer query final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery) .setAggregatorSpecs(Lists.newArrayList(aggs)) .setInterval(subquery.getIntervals()) .setPostAggregatorSpecs(Lists.<PostAggregator>newArrayList()) .build(); final GroupByQuery outerQuery = new GroupByQuery.Builder(query) .setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())) .build(); final IncrementalIndex innerQueryResultIndex = makeIncrementalIndex( innerQuery.withOverriddenContext( ImmutableMap.<String, Object>of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subqueryResult); // Outer query might have multiple intervals, but they are expected to be non-overlapping and // sorted which // is ensured by QuerySegmentSpec. // GroupByQueryEngine can only process one interval at a time, so we need to call it once per // interval // and concatenate the results. final IncrementalIndex outerQueryResultIndex = makeIncrementalIndex( outerQuery, Sequences.concat( Sequences.map( Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<Row>>() { @Override public Sequence<Row> apply(Interval interval) { return process( outerQuery.withQuerySegmentSpec( new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex)); } }))); innerQueryResultIndex.close(); return new ResourceClosingSequence<>( outerQuery.applyLimit(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex); }