// Ignored already selected dimensions private Map<String, Collection<String>> retrieveDimensionValues( String collection, long baselineMillis, long currentMillis, double contributionThreshold, int dimensionValuesLimit) throws Exception { List<String> dimensions = getAllDimensions(collection); DateTime baseline = new DateTime(baselineMillis); DateTime current = new DateTime(currentMillis); List<String> metrics = getMetrics(collection); String dummyFunction = String.format( DIMENSION_VALUES_OPTIONS_METRIC_FUNCTION, METRIC_FUNCTION_JOINER.join(metrics)); MultivaluedMap<String, String> dimensionValues = new MultivaluedMapImpl(); Map<String, Future<QueryResult>> resultFutures = new HashMap<>(); // query w/ group by for each dimension. for (String dimension : dimensions) { // Generate SQL dimensionValues.put(dimension, Arrays.asList("!")); String sql = SqlUtils.getSql(dummyFunction, collection, baseline, current, dimensionValues, null); LOGGER.info("Generated SQL for dimension retrieval {}: {}", serverUri, sql); dimensionValues.remove(dimension); // Query (in parallel) resultFutures.put(dimension, queryCache.getQueryResultAsync(serverUri, sql)); } Map<String, Collection<String>> collectedDimensionValues = new HashMap<>(); // Wait for all queries and generate the ordered list from the result. for (int i = 0; i < dimensions.size(); i++) { String dimension = dimensions.get(i); QueryResult queryResult = resultFutures.get(dimension).get(); // Sum up hourly data over entire dataset for each dimension combination int metricCount = metrics.size(); double[] total = new double[metricCount]; Map<String, double[]> summedValues = new HashMap<>(); for (Map.Entry<String, Map<String, Number[]>> entry : queryResult.getData().entrySet()) { double[] sum = new double[metricCount]; for (Map.Entry<String, Number[]> hourlyEntry : entry.getValue().entrySet()) { for (int j = 0; j < metricCount; j++) { double value = hourlyEntry.getValue()[j].doubleValue(); sum[j] += value; } } summedValues.put(entry.getKey(), sum); // update total w/ sums for each dimension value. for (int j = 0; j < metricCount; j++) { total[j] += sum[j]; } } // compare by value ascending (want poll to remove smallest element) List<PriorityQueue<Pair<String, Double>>> topNValuesByMetric = new ArrayList<PriorityQueue<Pair<String, Double>>>(metricCount); double[] threshold = new double[metricCount]; Comparator<Pair<String, Double>> valueComparator = new Comparator<Pair<String, Double>>() { @Override public int compare(Pair<String, Double> a, Pair<String, Double> b) { return Double.compare(a.getValue().doubleValue(), b.getValue().doubleValue()); } }; for (int j = 0; j < metricCount; j++) { threshold[j] = total[j] * contributionThreshold; topNValuesByMetric.add(new PriorityQueue<>(dimensionValuesLimit, valueComparator)); } // For each dimension value, add it only if it meets the threshold and drop an element from // the priority queue if over the limit. for (Map.Entry<String, double[]> entry : summedValues.entrySet()) { List<String> combination = objectMapper.readValue(entry.getKey(), LIST_TYPE_REF); String dimensionValue = combination.get(i); for (int j = 0; j < metricCount; j++) { // metricCount == entry.getValue().length double dimensionValueContribution = entry.getValue()[j]; if (dimensionValueContribution >= threshold[j]) { PriorityQueue<Pair<String, Double>> topNValues = topNValuesByMetric.get(j); topNValues.add(new Pair<>(dimensionValue, dimensionValueContribution)); if (topNValues.size() > dimensionValuesLimit) { topNValues.poll(); } } } } // Poll returns the elements in order of ascending contribution, so poll and reverse the // order. // not LinkedHashSet because we need to reverse insertion order with metrics. List<String> sortedValues = new ArrayList<>(); HashSet<String> sortedValuesSet = new HashSet<>(); for (int j = 0; j < metricCount; j++) { PriorityQueue<Pair<String, Double>> topNValues = topNValuesByMetric.get(j); int startIndex = sortedValues.size(); while (!topNValues.isEmpty()) { Pair<String, Double> pair = topNValues.poll(); String dimensionValue = pair.getKey(); if (!sortedValuesSet.contains(dimensionValue)) { sortedValues.add(startIndex, dimensionValue); sortedValuesSet.add(dimensionValue); } } } collectedDimensionValues.put(dimension, sortedValues); } return collectedDimensionValues; }
public MedlineGenerator(String seed) { String[] fields = seed.split("\\r?\\n"); for (String fieldData : fields) { if (fieldData.length() < 4) { continue; } final Scanner scanner = new Scanner(fieldData); scanner.useDelimiter("\\t"); String fieldName = scanner.next(); scanner.useDelimiter("; "); scanner.findInLine("\t"); MedlineFieldDefinition defn = MedlineFieldDefinitions.getDefinition(fieldName); MedlineFieldDefinition.FieldType fieldType = defn != null ? defn.type : MedlineFieldDefinition.FieldType.SINGLE_TEXT_VALUE; BaseFieldModel fieldModel = null; Iterable<Pair<Long, String>> scannerIterator = new Iterable<Pair<Long, String>>() { @Override public Iterator<Pair<Long, String>> iterator() { return new Iterator<Pair<Long, String>>() { @Override public boolean hasNext() { return scanner.hasNext(); } @Override public Pair<Long, String> next() { String value = ""; String[] data; do { String next = scanner.next(); data = next.split("\\t"); if (data.length > 2) { throw new IllegalStateException( String.format("Cannot parse word: '%s'", value + next)); } value += data[0]; } while (data.length < 2); return new Pair<>(Long.parseLong(data[1]), value); } @Override public void remove() { throw new NotImplementedException(); } }; } }; switch (fieldType) { case ARRAY_TEXT_VALUES: case SINGLE_TEXT_VALUE: case WORDS: SimpleFieldModel model = new SimpleFieldModel(fieldName, fieldType); for (Pair<Long, String> pair : scannerIterator) { model.addValue(pair.getKey(), pair.getValue()); } fieldModel = model; break; case SINGLE_OBJECT_VALUE: ObjectFieldModel objectModel = new ObjectFieldModel(fieldName); for (Pair<Long, String> pair : scannerIterator) { long weight = pair.getKey(); String propertyData = pair.getValue(); int firstColonIndex = propertyData.indexOf(':'); String propertyName = propertyData.substring(0, firstColonIndex); String propertyValue = propertyData.substring(firstColonIndex); objectModel.addValue(propertyName, weight, propertyValue); } fieldModel = objectModel; break; } fieldModels.put(fieldName, fieldModel); } }