private void createTestIndex(File segmentDir) throws Exception { List<String> rows = Lists.newArrayList( "2014102200,host1,10", "2014102200,host2,20", "2014102200,host3,30", "2014102201,host1,10", "2014102201,host2,20", "2014102201,host3,30", "2014102202,host1,10", "2014102202,host2,20", "2014102202,host3,30"); StringInputRowParser parser = new StringInputRowParser( new CSVParseSpec( new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(ImmutableList.of("host"), null, null), null, ImmutableList.of("timestamp", "host", "visited")), Charsets.UTF_8.toString()); AggregatorFactory[] aggregators = new AggregatorFactory[] {new LongSumAggregatorFactory("visited_sum", "visited")}; IncrementalIndex index = null; try { index = new OnheapIncrementalIndex(0, QueryGranularity.NONE, aggregators, true, 5000); for (String line : rows) { index.add(parser.parse(line)); } IndexMerger.persist(index, segmentDir, null, new IndexSpec()); } finally { if (index != null) { index.close(); } } }
@AfterClass public static void cleanup() throws Exception { queryableIndex.close(); incrementalIndex.close(); FileUtils.deleteDirectory(persistedSegmentDir); }
@Override public Sequence<Row> processSubqueryResult( GroupByQuery subquery, GroupByQuery query, Sequence<Row> subqueryResult) { final Set<AggregatorFactory> aggs = Sets.newHashSet(); // Nested group-bys work by first running the inner query and then materializing the results in // an incremental // index which the outer query is then run against. To build the incremental index, we use the // fieldNames from // the aggregators for the outer query to define the column names so that the index will match // the query. If // there are multiple types of aggregators in the outer query referencing the same fieldName, we // will try to build // multiple columns of the same name using different aggregator types and will fail. Here, we // permit multiple // aggregators of the same type referencing the same fieldName (and skip creating identical // columns for the // subsequent ones) and return an error if the aggregator types are different. for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) { for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) { if (Iterables.any( aggs, new Predicate<AggregatorFactory>() { @Override public boolean apply(AggregatorFactory agg) { return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg); } })) { throw new IAE( "Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName()); } aggs.add(transferAgg); } } // We need the inner incremental index to have all the columns required by the outer query final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery) .setAggregatorSpecs(Lists.newArrayList(aggs)) .setInterval(subquery.getIntervals()) .setPostAggregatorSpecs(Lists.<PostAggregator>newArrayList()) .build(); final GroupByQuery outerQuery = new GroupByQuery.Builder(query) .setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())) .build(); final IncrementalIndex innerQueryResultIndex = makeIncrementalIndex( innerQuery.withOverriddenContext( ImmutableMap.<String, Object>of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subqueryResult); // Outer query might have multiple intervals, but they are expected to be non-overlapping and // sorted which // is ensured by QuerySegmentSpec. // GroupByQueryEngine can only process one interval at a time, so we need to call it once per // interval // and concatenate the results. final IncrementalIndex outerQueryResultIndex = makeIncrementalIndex( outerQuery, Sequences.concat( Sequences.map( Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<Row>>() { @Override public Sequence<Row> apply(Interval interval) { return process( outerQuery.withQuerySegmentSpec( new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex)); } }))); innerQueryResultIndex.close(); return new ResourceClosingSequence<>( outerQuery.applyLimit(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex); }