/** * Returns the most crowded point according to SPEA2's truncation strategy. The most crowded * point is the point with the smallest distance to its nearest neighbor. Ties are broken by * looking at the next nearest neighbor repeatedly until a difference is found. * * @return the index of the most crowded point */ public int findMostCrowdedPoint() { double minimumDistance = Double.POSITIVE_INFINITY; int minimumIndex = -1; for (int i = 0; i < distanceMatrix.size(); i++) { List<Pair<Integer, Double>> distances = distanceMatrix.get(i); Pair<Integer, Double> point = distances.get(0); if (point.getSecond() < minimumDistance) { minimumDistance = point.getSecond(); minimumIndex = i; } else if (point.getSecond() == minimumDistance) { for (int k = 0; k < distances.size(); k++) { double kdist1 = distances.get(k).getSecond(); double kdist2 = distanceMatrix.get(minimumIndex).get(k).getSecond(); if (kdist1 < kdist2) { minimumIndex = i; break; } else if (kdist2 < kdist1) { break; } } } } return minimumIndex; }
private Collection<List<TimeSample>> collectUserTimeSamples(Pair<User, User> pair) { Collection<List<TimeSample>> datasets = new ArrayList<>(); // for (User user : users) { UserData userData = this.usersData.get(pair.getFirst()); datasets.add(new ArrayList<>(userData.getBtData())); datasets.add(new ArrayList<>(userData.getActivityData())); datasets.add(new ArrayList<>(userData.getWifiData())); datasets.add(new ArrayList<>(userData.getAudioData())); userData = this.usersData.get(pair.getSecond()); datasets.add(new ArrayList<>(userData.getBtData())); datasets.add(new ArrayList<>(userData.getActivityData())); datasets.add(new ArrayList<>(userData.getWifiData())); datasets.add(new ArrayList<>(userData.getAudioData())); // } return datasets; }
/** * Removes the point with the given index. * * @param index the index to remove */ public void removePoint(int index) { distanceMatrix.remove(index); for (List<Pair<Integer, Double>> distances : distanceMatrix) { ListIterator<Pair<Integer, Double>> iterator = distances.listIterator(); while (iterator.hasNext()) { Pair<Integer, Double> point = iterator.next(); if (point.getFirst() == index) { iterator.remove(); } else if (point.getFirst() > index) { // decrement the index so it stays aligned with the // index in distanceMatrix iterator.set(new Pair<Integer, Double>(point.getFirst() - 1, point.getSecond())); } } } }
private static List<Pair<String, Double>> convertCounter(Counter<String> counts, double power) { List<Pair<String, Double>> convertedCounts = new ArrayList<Pair<String, Double>>(); for (String word : counts.keySet()) { double x = Math.pow(counts.getCount(word), power); Pair<String, Double> countPair = Pair.create(word, x); convertedCounts.add(countPair); } return convertedCounts; }
private Pair<Integer, Set<Integer>> getWordContextPair( List<String> sentence, int wordPosition) { String centerWord = sentence.get(wordPosition); int centerWordIndex = encodedVocab.get(centerWord); Set<Integer> contextWordSet = new HashSet<Integer>(); for (int i = wordPosition - contextSize; i < wordPosition + contextSize; i++) { if (i < 0) continue; // Ignore contexts prior to start of sentence if (i >= sentence.size()) break; // Ignore contexts after end of current sentence if (i == centerWordIndex) continue; // Ignore center word String contextWord = sentence.get(i); int contextWordIndex = encodedVocab.get(contextWord); contextWordSet.add(contextWordIndex); } return Pair.create(centerWordIndex, contextWordSet); }
/** * Computes the Kendall's Tau rank correlation coefficient between the two arrays. * * @param xArray first data array * @param yArray second data array * @return Returns Kendall's Tau rank correlation coefficient for the two arrays * @throws DimensionMismatchException if the arrays lengths do not match */ public double correlation(final double[] xArray, final double[] yArray) throws DimensionMismatchException { if (xArray.length != yArray.length) { throw new DimensionMismatchException(xArray.length, yArray.length); } final int n = xArray.length; final long numPairs = n * (n - 1l) / 2l; @SuppressWarnings("unchecked") Pair<Double, Double>[] pairs = new Pair[n]; for (int i = 0; i < n; i++) { pairs[i] = new Pair<Double, Double>(xArray[i], yArray[i]); } Arrays.sort( pairs, new Comparator<Pair<Double, Double>>() { public int compare(Pair<Double, Double> pair1, Pair<Double, Double> pair2) { int compareFirst = pair1.getFirst().compareTo(pair2.getFirst()); return compareFirst != 0 ? compareFirst : pair1.getSecond().compareTo(pair2.getSecond()); } }); int tiedXPairs = 0; int tiedXYPairs = 0; int consecutiveXTies = 1; int consecutiveXYTies = 1; Pair<Double, Double> prev = pairs[0]; for (int i = 1; i < n; i++) { final Pair<Double, Double> curr = pairs[i]; if (curr.getFirst().equals(prev.getFirst())) { consecutiveXTies++; if (curr.getSecond().equals(prev.getSecond())) { consecutiveXYTies++; } else { tiedXYPairs += consecutiveXYTies * (consecutiveXYTies - 1) / 2; consecutiveXYTies = 1; } } else { tiedXPairs += consecutiveXTies * (consecutiveXTies - 1) / 2; consecutiveXTies = 1; tiedXYPairs += consecutiveXYTies * (consecutiveXYTies - 1) / 2; consecutiveXYTies = 1; } prev = curr; } tiedXPairs += consecutiveXTies * (consecutiveXTies - 1) / 2; tiedXYPairs += consecutiveXYTies * (consecutiveXYTies - 1) / 2; int swaps = 0; @SuppressWarnings("unchecked") Pair<Double, Double>[] pairsDestination = new Pair[n]; for (int segmentSize = 1; segmentSize < n; segmentSize <<= 1) { for (int offset = 0; offset < n; offset += 2 * segmentSize) { int i = offset; final int iEnd = FastMath.min(i + segmentSize, n); int j = iEnd; final int jEnd = FastMath.min(j + segmentSize, n); int copyLocation = offset; while (i < iEnd || j < jEnd) { if (i < iEnd) { if (j < jEnd) { if (pairs[i].getSecond().compareTo(pairs[j].getSecond()) <= 0) { pairsDestination[copyLocation] = pairs[i]; i++; } else { pairsDestination[copyLocation] = pairs[j]; j++; swaps += iEnd - i; } } else { pairsDestination[copyLocation] = pairs[i]; i++; } } else { pairsDestination[copyLocation] = pairs[j]; j++; } copyLocation++; } } final Pair<Double, Double>[] pairsTemp = pairs; pairs = pairsDestination; pairsDestination = pairsTemp; } int tiedYPairs = 0; int consecutiveYTies = 1; prev = pairs[0]; for (int i = 1; i < n; i++) { final Pair<Double, Double> curr = pairs[i]; if (curr.getSecond().equals(prev.getSecond())) { consecutiveYTies++; } else { tiedYPairs += consecutiveYTies * (consecutiveYTies - 1) / 2; consecutiveYTies = 1; } prev = curr; } tiedYPairs += consecutiveYTies * (consecutiveYTies - 1) / 2; int concordantMinusDiscordant = numPairs - tiedXPairs - tiedYPairs + tiedXYPairs - 2 * swaps; return concordantMinusDiscordant / FastMath.sqrt((numPairs - tiedXPairs) * (numPairs - tiedYPairs)); }
// Ignored already selected dimensions private Map<String, Collection<String>> retrieveDimensionValues( String collection, long baselineMillis, long currentMillis, double contributionThreshold, int dimensionValuesLimit) throws Exception { List<String> dimensions = getAllDimensions(collection); DateTime baseline = new DateTime(baselineMillis); DateTime current = new DateTime(currentMillis); List<String> metrics = getMetrics(collection); String dummyFunction = String.format( DIMENSION_VALUES_OPTIONS_METRIC_FUNCTION, METRIC_FUNCTION_JOINER.join(metrics)); MultivaluedMap<String, String> dimensionValues = new MultivaluedMapImpl(); Map<String, Future<QueryResult>> resultFutures = new HashMap<>(); // query w/ group by for each dimension. for (String dimension : dimensions) { // Generate SQL dimensionValues.put(dimension, Arrays.asList("!")); String sql = SqlUtils.getSql(dummyFunction, collection, baseline, current, dimensionValues, null); LOGGER.info("Generated SQL for dimension retrieval {}: {}", serverUri, sql); dimensionValues.remove(dimension); // Query (in parallel) resultFutures.put(dimension, queryCache.getQueryResultAsync(serverUri, sql)); } Map<String, Collection<String>> collectedDimensionValues = new HashMap<>(); // Wait for all queries and generate the ordered list from the result. for (int i = 0; i < dimensions.size(); i++) { String dimension = dimensions.get(i); QueryResult queryResult = resultFutures.get(dimension).get(); // Sum up hourly data over entire dataset for each dimension combination int metricCount = metrics.size(); double[] total = new double[metricCount]; Map<String, double[]> summedValues = new HashMap<>(); for (Map.Entry<String, Map<String, Number[]>> entry : queryResult.getData().entrySet()) { double[] sum = new double[metricCount]; for (Map.Entry<String, Number[]> hourlyEntry : entry.getValue().entrySet()) { for (int j = 0; j < metricCount; j++) { double value = hourlyEntry.getValue()[j].doubleValue(); sum[j] += value; } } summedValues.put(entry.getKey(), sum); // update total w/ sums for each dimension value. for (int j = 0; j < metricCount; j++) { total[j] += sum[j]; } } // compare by value ascending (want poll to remove smallest element) List<PriorityQueue<Pair<String, Double>>> topNValuesByMetric = new ArrayList<PriorityQueue<Pair<String, Double>>>(metricCount); double[] threshold = new double[metricCount]; Comparator<Pair<String, Double>> valueComparator = new Comparator<Pair<String, Double>>() { @Override public int compare(Pair<String, Double> a, Pair<String, Double> b) { return Double.compare(a.getValue().doubleValue(), b.getValue().doubleValue()); } }; for (int j = 0; j < metricCount; j++) { threshold[j] = total[j] * contributionThreshold; topNValuesByMetric.add(new PriorityQueue<>(dimensionValuesLimit, valueComparator)); } // For each dimension value, add it only if it meets the threshold and drop an element from // the priority queue if over the limit. for (Map.Entry<String, double[]> entry : summedValues.entrySet()) { List<String> combination = objectMapper.readValue(entry.getKey(), LIST_TYPE_REF); String dimensionValue = combination.get(i); for (int j = 0; j < metricCount; j++) { // metricCount == entry.getValue().length double dimensionValueContribution = entry.getValue()[j]; if (dimensionValueContribution >= threshold[j]) { PriorityQueue<Pair<String, Double>> topNValues = topNValuesByMetric.get(j); topNValues.add(new Pair<>(dimensionValue, dimensionValueContribution)); if (topNValues.size() > dimensionValuesLimit) { topNValues.poll(); } } } } // Poll returns the elements in order of ascending contribution, so poll and reverse the // order. // not LinkedHashSet because we need to reverse insertion order with metrics. List<String> sortedValues = new ArrayList<>(); HashSet<String> sortedValuesSet = new HashSet<>(); for (int j = 0; j < metricCount; j++) { PriorityQueue<Pair<String, Double>> topNValues = topNValuesByMetric.get(j); int startIndex = sortedValues.size(); while (!topNValues.isEmpty()) { Pair<String, Double> pair = topNValues.poll(); String dimensionValue = pair.getKey(); if (!sortedValuesSet.contains(dimensionValue)) { sortedValues.add(startIndex, dimensionValue); sortedValuesSet.add(dimensionValue); } } } collectedDimensionValues.put(dimension, sortedValues); } return collectedDimensionValues; }
private void loadFeaturesData(Pair<User, User> pair) { usersData.put(pair.getFirst(), new UserFeaturesDataLoader(pair.getFirst(), interval).get()); usersData.put(pair.getSecond(), new UserFeaturesDataLoader(pair.getSecond(), interval).get()); }
public MedlineGenerator(String seed) { String[] fields = seed.split("\\r?\\n"); for (String fieldData : fields) { if (fieldData.length() < 4) { continue; } final Scanner scanner = new Scanner(fieldData); scanner.useDelimiter("\\t"); String fieldName = scanner.next(); scanner.useDelimiter("; "); scanner.findInLine("\t"); MedlineFieldDefinition defn = MedlineFieldDefinitions.getDefinition(fieldName); MedlineFieldDefinition.FieldType fieldType = defn != null ? defn.type : MedlineFieldDefinition.FieldType.SINGLE_TEXT_VALUE; BaseFieldModel fieldModel = null; Iterable<Pair<Long, String>> scannerIterator = new Iterable<Pair<Long, String>>() { @Override public Iterator<Pair<Long, String>> iterator() { return new Iterator<Pair<Long, String>>() { @Override public boolean hasNext() { return scanner.hasNext(); } @Override public Pair<Long, String> next() { String value = ""; String[] data; do { String next = scanner.next(); data = next.split("\\t"); if (data.length > 2) { throw new IllegalStateException( String.format("Cannot parse word: '%s'", value + next)); } value += data[0]; } while (data.length < 2); return new Pair<>(Long.parseLong(data[1]), value); } @Override public void remove() { throw new NotImplementedException(); } }; } }; switch (fieldType) { case ARRAY_TEXT_VALUES: case SINGLE_TEXT_VALUE: case WORDS: SimpleFieldModel model = new SimpleFieldModel(fieldName, fieldType); for (Pair<Long, String> pair : scannerIterator) { model.addValue(pair.getKey(), pair.getValue()); } fieldModel = model; break; case SINGLE_OBJECT_VALUE: ObjectFieldModel objectModel = new ObjectFieldModel(fieldName); for (Pair<Long, String> pair : scannerIterator) { long weight = pair.getKey(); String propertyData = pair.getValue(); int firstColonIndex = propertyData.indexOf(':'); String propertyName = propertyData.substring(0, firstColonIndex); String propertyValue = propertyData.substring(firstColonIndex); objectModel.addValue(propertyName, weight, propertyValue); } fieldModel = objectModel; break; } fieldModels.put(fieldName, fieldModel); } }