Example #1
0
  public static void main(String[] args) {
    // how many sales?
    long saleCount = saleStream().count();
    System.out.println("Count of sales: " + saleCount);

    // any sales over $100?
    Supplier<DoubleStream> totalStream = () -> saleStream().mapToDouble(Sale::total);
    boolean bigSaleDay = totalStream.get().anyMatch(total -> total > 100.00);
    System.out.println("Big sale day? " + bigSaleDay);

    // maximum sale amount?
    DoubleSummaryStatistics stats = totalStream.get().summaryStatistics();
    System.out.println("Max sale amount: " + stats.getMax());
    System.out.println("Stats on total: " + stats);

    // how many items were sold today?
    Supplier<Stream<Item>> itemStream = () -> saleStream().flatMap(sale -> sale.items.stream());
    long itemCount = itemStream.get().count();
    System.out.println("Count of items: " + itemCount);

    // which different items were sold today?
    String uniqueItems =
        itemStream.get().map(item -> item.identity).distinct().collect(Collectors.joining(" & "));
    System.out.println("Distinct items: " + uniqueItems);

    // summarize sales by store
    ConcurrentMap<String, DoubleSummaryStatistics> summary =
        saleStream()
            .parallel()
            .collect(
                Collectors.groupingByConcurrent(
                    sale -> Thread.currentThread().getName(),
                    Collectors.summarizingDouble(Sale::total)));
    System.out.println("Summary by thread: " + summary);
    summary
        .keySet()
        .stream()
        .sorted()
        .forEach(store -> System.out.println(store + " stats: " + summary.get(store)));
  }
  /**
   * 聚集计算
   *
   * @param dataList 待计算的数据
   * @param query 原始查询请求
   * @return LinkedList<ResultRecord> 计算后的数据
   */
  public static List<SearchIndexResultRecord> aggregate(
      List<SearchIndexResultRecord> dataList, int dimSize, List<QueryMeasure> queryMeasures) {

    if (CollectionUtils.isEmpty(queryMeasures) || CollectionUtils.isEmpty(dataList)) {
      LOGGER.info("no need to group.");
      return dataList;
    }

    Set<Integer> countIndex = Sets.newHashSet();
    for (int i = 0; i < queryMeasures.size(); i++) {
      if (queryMeasures.get(i).getAggregator().equals(Aggregator.DISTINCT_COUNT)) {
        if (LOGGER.isDebugEnabled()) {
          LOGGER.info(queryMeasures.get(i) + " ============= begin print values ===== ====");
          final int tmp = i;
          dataList.forEach(rs -> LOGGER.info(rs.getField(tmp) + ""));
          LOGGER.info(" ============= end print measure values ==============");
        }
        countIndex.add(i);
      }
    }

    int arraySize = dataList.get(0).getFieldArraySize();

    long current = System.currentTimeMillis();
    Stream<SearchIndexResultRecord> stream =
        dataList.size() > 300000 ? dataList.parallelStream() : dataList.stream();

    int defaultSize = (int) (dataList.size() > 100 ? dataList.size() * 0.01 : dataList.size());

    final BinaryOperator<SearchIndexResultRecord> reduceOperation =
        (x, y) -> {
          if (!y.getGroupBy().equals(x.getGroupBy())) {
            x = SearchIndexResultRecord.of(arraySize);
            x.setGroupBy(y.getGroupBy());
            for (int i = 0; i < dimSize; i++) {
              x.setField(i, y.getField(i));
            }
          }
          try {
            int index = dimSize;
            for (int i = 0; i < queryMeasures.size(); i++) {
              QueryMeasure measure = queryMeasures.get(i);
              index = i + dimSize;
              if (measure.getAggregator().equals(Aggregator.DISTINCT_COUNT)) {
                if (!x.getDistinctMeasures().containsKey(i)) {
                  x.getDistinctMeasures().put(i, new HashSet<>(defaultSize));
                }

                if (y.getDistinctMeasures().containsKey(i)) {
                  x.getDistinctMeasures().get(i).addAll(y.getDistinctMeasures().get(i));
                } else if (y.getField(index) != null) {
                  x.getDistinctMeasures().get(i).add(y.getField(index));
                }

              } else {
                x.setField(
                    index, measure.getAggregator().aggregate(x.getField(index), y.getField(index)));
              }
            }
          } catch (Exception e) {
            throw new RuntimeException(e);
          }
          return x;
        };
    final Collector<SearchIndexResultRecord, ?, SearchIndexResultRecord> reducing =
        Collectors.reducing(SearchIndexResultRecord.of(arraySize), reduceOperation);
    Map<String, SearchIndexResultRecord> groupResult =
        stream.collect(
            Collectors.groupingByConcurrent(SearchIndexResultRecord::getGroupBy, reducing));

    if (CollectionUtils.isNotEmpty(countIndex)) {
      groupResult
          .values()
          .forEach(
              record -> {
                for (int index : countIndex) {
                  if (record.getDistinctMeasures() != null
                      && record.getDistinctMeasures().containsKey(index)) {
                    record.setField(
                        dimSize + index, record.getDistinctMeasures().get(index).size());
                  }
                }
              });
    }
    LOGGER.info(
        "group agg(sum) cost: {}ms, size:{}!",
        (System.currentTimeMillis() - current),
        groupResult.size());
    return new ArrayList<>(groupResult.values());
  }
Example #3
0
  @Test
  public void stream_test() throws Exception {
    List<String> strings = Arrays.asList("abc", "bc", "", "xyz", "abc");

    List<String> recollectedStrings = strings.stream().collect(Collectors.toList());
    assertThat(
        TestHelper.getLastRecordedStateForVariable("recollectedStrings"),
        equalTo(recollectedStrings.toString()));

    Map<Integer, List<String>> groupedStrings =
        strings.stream().collect(Collectors.groupingByConcurrent(String::length));
    assertThat(
        TestHelper.getLastRecordedStateForVariable("groupedStrings"),
        equalTo(groupedStrings.toString()));

    List<String> filteredStrings =
        strings.stream().filter(string -> !string.isEmpty()).collect(Collectors.toList());
    assertThat(
        TestHelper.getLastRecordedStateForVariable("filteredStrings"),
        equalTo(filteredStrings.toString()));

    List<String> otherStrings = new ArrayList<>();
    strings.forEach(s -> otherStrings.add(s));
    assertThat(
        TestHelper.getLastRecordedStateForVariable("otherStrings"),
        equalTo(otherStrings.toString()));

    List<String> doubleStrings = strings.stream().map(s -> s + s).collect(Collectors.toList());
    assertThat(
        TestHelper.getLastRecordedStateForVariable("doubleStrings"),
        equalTo(doubleStrings.toString()));

    List<String> parallellyComputedDoubleStrings =
        strings.parallelStream().map(s -> s + s).collect(Collectors.toList());
    assertThat(
        TestHelper.getLastRecordedStateForVariable("parallellyComputedDoubleStrings"),
        equalTo(parallellyComputedDoubleStrings.toString()));

    List<String> distinctStrings = strings.stream().distinct().collect(Collectors.toList());
    assertThat(
        TestHelper.getLastRecordedStateForVariable("distinctStrings"),
        equalTo(distinctStrings.toString()));

    List<String> limitedStrings = strings.stream().limit(3).collect(Collectors.toList());
    assertThat(
        TestHelper.getLastRecordedStateForVariable("limitedStrings"),
        equalTo(limitedStrings.toString()));

    List<String> sortedStrings = strings.stream().sorted().collect(Collectors.toList());
    assertThat(
        TestHelper.getLastRecordedStateForVariable("sortedStrings"),
        equalTo(sortedStrings.toString()));

    String mergedStrings =
        strings.stream().filter(string -> !string.isEmpty()).collect(Collectors.joining(", "));
    assertThat(
        TestHelper.getLastRecordedStateForVariable("mergedStrings"),
        equalTo(mergedStrings.toString()));

    boolean hasX = strings.stream().anyMatch(s -> s.contains("x"));
    assertThat(TestHelper.getLastRecordedStateForVariable("hasX"), equalTo(Boolean.toString(hasX)));
  }