コード例 #1
0
  // Ignored already selected dimensions
  private Map<String, Collection<String>> retrieveDimensionValues(
      String collection,
      long baselineMillis,
      long currentMillis,
      double contributionThreshold,
      int dimensionValuesLimit)
      throws Exception {
    List<String> dimensions = getAllDimensions(collection);
    DateTime baseline = new DateTime(baselineMillis);
    DateTime current = new DateTime(currentMillis);

    List<String> metrics = getMetrics(collection);
    String dummyFunction =
        String.format(
            DIMENSION_VALUES_OPTIONS_METRIC_FUNCTION, METRIC_FUNCTION_JOINER.join(metrics));

    MultivaluedMap<String, String> dimensionValues = new MultivaluedMapImpl();
    Map<String, Future<QueryResult>> resultFutures = new HashMap<>();
    // query w/ group by for each dimension.
    for (String dimension : dimensions) {
      // Generate SQL
      dimensionValues.put(dimension, Arrays.asList("!"));
      String sql =
          SqlUtils.getSql(dummyFunction, collection, baseline, current, dimensionValues, null);
      LOGGER.info("Generated SQL for dimension retrieval {}: {}", serverUri, sql);
      dimensionValues.remove(dimension);

      // Query (in parallel)
      resultFutures.put(dimension, queryCache.getQueryResultAsync(serverUri, sql));
    }

    Map<String, Collection<String>> collectedDimensionValues = new HashMap<>();
    // Wait for all queries and generate the ordered list from the result.
    for (int i = 0; i < dimensions.size(); i++) {
      String dimension = dimensions.get(i);
      QueryResult queryResult = resultFutures.get(dimension).get();

      // Sum up hourly data over entire dataset for each dimension combination
      int metricCount = metrics.size();
      double[] total = new double[metricCount];
      Map<String, double[]> summedValues = new HashMap<>();

      for (Map.Entry<String, Map<String, Number[]>> entry : queryResult.getData().entrySet()) {
        double[] sum = new double[metricCount];
        for (Map.Entry<String, Number[]> hourlyEntry : entry.getValue().entrySet()) {
          for (int j = 0; j < metricCount; j++) {
            double value = hourlyEntry.getValue()[j].doubleValue();
            sum[j] += value;
          }
        }
        summedValues.put(entry.getKey(), sum);
        // update total w/ sums for each dimension value.
        for (int j = 0; j < metricCount; j++) {
          total[j] += sum[j];
        }
      }

      // compare by value ascending (want poll to remove smallest element)
      List<PriorityQueue<Pair<String, Double>>> topNValuesByMetric =
          new ArrayList<PriorityQueue<Pair<String, Double>>>(metricCount);
      double[] threshold = new double[metricCount];
      Comparator<Pair<String, Double>> valueComparator =
          new Comparator<Pair<String, Double>>() {
            @Override
            public int compare(Pair<String, Double> a, Pair<String, Double> b) {
              return Double.compare(a.getValue().doubleValue(), b.getValue().doubleValue());
            }
          };
      for (int j = 0; j < metricCount; j++) {
        threshold[j] = total[j] * contributionThreshold;
        topNValuesByMetric.add(new PriorityQueue<>(dimensionValuesLimit, valueComparator));
      }

      // For each dimension value, add it only if it meets the threshold and drop an element from
      // the priority queue if over the limit.
      for (Map.Entry<String, double[]> entry : summedValues.entrySet()) {
        List<String> combination = objectMapper.readValue(entry.getKey(), LIST_TYPE_REF);
        String dimensionValue = combination.get(i);
        for (int j = 0; j < metricCount; j++) { // metricCount == entry.getValue().length
          double dimensionValueContribution = entry.getValue()[j];
          if (dimensionValueContribution >= threshold[j]) {
            PriorityQueue<Pair<String, Double>> topNValues = topNValuesByMetric.get(j);
            topNValues.add(new Pair<>(dimensionValue, dimensionValueContribution));
            if (topNValues.size() > dimensionValuesLimit) {
              topNValues.poll();
            }
          }
        }
      }

      // Poll returns the elements in order of ascending contribution, so poll and reverse the
      // order.

      // not LinkedHashSet because we need to reverse insertion order with metrics.
      List<String> sortedValues = new ArrayList<>();
      HashSet<String> sortedValuesSet = new HashSet<>();

      for (int j = 0; j < metricCount; j++) {
        PriorityQueue<Pair<String, Double>> topNValues = topNValuesByMetric.get(j);
        int startIndex = sortedValues.size();
        while (!topNValues.isEmpty()) {
          Pair<String, Double> pair = topNValues.poll();
          String dimensionValue = pair.getKey();
          if (!sortedValuesSet.contains(dimensionValue)) {
            sortedValues.add(startIndex, dimensionValue);
            sortedValuesSet.add(dimensionValue);
          }
        }
      }

      collectedDimensionValues.put(dimension, sortedValues);
    }
    return collectedDimensionValues;
  }
コード例 #2
0
  public MedlineGenerator(String seed) {
    String[] fields = seed.split("\\r?\\n");

    for (String fieldData : fields) {
      if (fieldData.length() < 4) {
        continue;
      }
      final Scanner scanner = new Scanner(fieldData);
      scanner.useDelimiter("\\t");
      String fieldName = scanner.next();
      scanner.useDelimiter("; ");
      scanner.findInLine("\t");

      MedlineFieldDefinition defn = MedlineFieldDefinitions.getDefinition(fieldName);

      MedlineFieldDefinition.FieldType fieldType =
          defn != null ? defn.type : MedlineFieldDefinition.FieldType.SINGLE_TEXT_VALUE;

      BaseFieldModel fieldModel = null;

      Iterable<Pair<Long, String>> scannerIterator =
          new Iterable<Pair<Long, String>>() {
            @Override
            public Iterator<Pair<Long, String>> iterator() {
              return new Iterator<Pair<Long, String>>() {
                @Override
                public boolean hasNext() {
                  return scanner.hasNext();
                }

                @Override
                public Pair<Long, String> next() {
                  String value = "";
                  String[] data;
                  do {
                    String next = scanner.next();
                    data = next.split("\\t");
                    if (data.length > 2) {
                      throw new IllegalStateException(
                          String.format("Cannot parse word: '%s'", value + next));
                    }
                    value += data[0];
                  } while (data.length < 2);
                  return new Pair<>(Long.parseLong(data[1]), value);
                }

                @Override
                public void remove() {
                  throw new NotImplementedException();
                }
              };
            }
          };

      switch (fieldType) {
        case ARRAY_TEXT_VALUES:
        case SINGLE_TEXT_VALUE:
        case WORDS:
          SimpleFieldModel model = new SimpleFieldModel(fieldName, fieldType);
          for (Pair<Long, String> pair : scannerIterator) {
            model.addValue(pair.getKey(), pair.getValue());
          }
          fieldModel = model;
          break;
        case SINGLE_OBJECT_VALUE:
          ObjectFieldModel objectModel = new ObjectFieldModel(fieldName);
          for (Pair<Long, String> pair : scannerIterator) {
            long weight = pair.getKey();
            String propertyData = pair.getValue();
            int firstColonIndex = propertyData.indexOf(':');
            String propertyName = propertyData.substring(0, firstColonIndex);
            String propertyValue = propertyData.substring(firstColonIndex);
            objectModel.addValue(propertyName, weight, propertyValue);
          }
          fieldModel = objectModel;
          break;
      }

      fieldModels.put(fieldName, fieldModel);
    }
  }