Esempio n. 1
0
    /**
     * Returns the most crowded point according to SPEA2's truncation strategy. The most crowded
     * point is the point with the smallest distance to its nearest neighbor. Ties are broken by
     * looking at the next nearest neighbor repeatedly until a difference is found.
     *
     * @return the index of the most crowded point
     */
    public int findMostCrowdedPoint() {
      double minimumDistance = Double.POSITIVE_INFINITY;
      int minimumIndex = -1;

      for (int i = 0; i < distanceMatrix.size(); i++) {
        List<Pair<Integer, Double>> distances = distanceMatrix.get(i);
        Pair<Integer, Double> point = distances.get(0);

        if (point.getSecond() < minimumDistance) {
          minimumDistance = point.getSecond();
          minimumIndex = i;
        } else if (point.getSecond() == minimumDistance) {
          for (int k = 0; k < distances.size(); k++) {
            double kdist1 = distances.get(k).getSecond();
            double kdist2 = distanceMatrix.get(minimumIndex).get(k).getSecond();

            if (kdist1 < kdist2) {
              minimumIndex = i;
              break;
            } else if (kdist2 < kdist1) {
              break;
            }
          }
        }
      }

      return minimumIndex;
    }
  private Collection<List<TimeSample>> collectUserTimeSamples(Pair<User, User> pair) {
    Collection<List<TimeSample>> datasets = new ArrayList<>();
    // for (User user : users) {
    UserData userData = this.usersData.get(pair.getFirst());
    datasets.add(new ArrayList<>(userData.getBtData()));
    datasets.add(new ArrayList<>(userData.getActivityData()));
    datasets.add(new ArrayList<>(userData.getWifiData()));
    datasets.add(new ArrayList<>(userData.getAudioData()));

    userData = this.usersData.get(pair.getSecond());
    datasets.add(new ArrayList<>(userData.getBtData()));
    datasets.add(new ArrayList<>(userData.getActivityData()));
    datasets.add(new ArrayList<>(userData.getWifiData()));
    datasets.add(new ArrayList<>(userData.getAudioData()));
    //	}
    return datasets;
  }
Esempio n. 3
0
    /**
     * Removes the point with the given index.
     *
     * @param index the index to remove
     */
    public void removePoint(int index) {
      distanceMatrix.remove(index);

      for (List<Pair<Integer, Double>> distances : distanceMatrix) {
        ListIterator<Pair<Integer, Double>> iterator = distances.listIterator();

        while (iterator.hasNext()) {
          Pair<Integer, Double> point = iterator.next();

          if (point.getFirst() == index) {
            iterator.remove();
          } else if (point.getFirst() > index) {
            // decrement the index so it stays aligned with the
            // index in distanceMatrix
            iterator.set(new Pair<Integer, Double>(point.getFirst() - 1, point.getSecond()));
          }
        }
      }
    }
Esempio n. 4
0
    private static List<Pair<String, Double>> convertCounter(Counter<String> counts, double power) {

      List<Pair<String, Double>> convertedCounts = new ArrayList<Pair<String, Double>>();

      for (String word : counts.keySet()) {
        double x = Math.pow(counts.getCount(word), power);
        Pair<String, Double> countPair = Pair.create(word, x);
        convertedCounts.add(countPair);
      }
      return convertedCounts;
    }
Esempio n. 5
0
    private Pair<Integer, Set<Integer>> getWordContextPair(
        List<String> sentence, int wordPosition) {

      String centerWord = sentence.get(wordPosition);
      int centerWordIndex = encodedVocab.get(centerWord);
      Set<Integer> contextWordSet = new HashSet<Integer>();

      for (int i = wordPosition - contextSize; i < wordPosition + contextSize; i++) {
        if (i < 0) continue; // Ignore contexts prior to start of sentence
        if (i >= sentence.size()) break; // Ignore contexts after end of current sentence
        if (i == centerWordIndex) continue; // Ignore center word

        String contextWord = sentence.get(i);
        int contextWordIndex = encodedVocab.get(contextWord);
        contextWordSet.add(contextWordIndex);
      }
      return Pair.create(centerWordIndex, contextWordSet);
    }
Esempio n. 6
0
  /**
   * Computes the Kendall's Tau rank correlation coefficient between the two arrays.
   *
   * @param xArray first data array
   * @param yArray second data array
   * @return Returns Kendall's Tau rank correlation coefficient for the two arrays
   * @throws DimensionMismatchException if the arrays lengths do not match
   */
  public double correlation(final double[] xArray, final double[] yArray)
      throws DimensionMismatchException {

    if (xArray.length != yArray.length) {
      throw new DimensionMismatchException(xArray.length, yArray.length);
    }

    final int n = xArray.length;
    final long numPairs = n * (n - 1l) / 2l;

    @SuppressWarnings("unchecked")
    Pair<Double, Double>[] pairs = new Pair[n];
    for (int i = 0; i < n; i++) {
      pairs[i] = new Pair<Double, Double>(xArray[i], yArray[i]);
    }

    Arrays.sort(
        pairs,
        new Comparator<Pair<Double, Double>>() {
          public int compare(Pair<Double, Double> pair1, Pair<Double, Double> pair2) {
            int compareFirst = pair1.getFirst().compareTo(pair2.getFirst());
            return compareFirst != 0
                ? compareFirst
                : pair1.getSecond().compareTo(pair2.getSecond());
          }
        });

    int tiedXPairs = 0;
    int tiedXYPairs = 0;
    int consecutiveXTies = 1;
    int consecutiveXYTies = 1;
    Pair<Double, Double> prev = pairs[0];
    for (int i = 1; i < n; i++) {
      final Pair<Double, Double> curr = pairs[i];
      if (curr.getFirst().equals(prev.getFirst())) {
        consecutiveXTies++;
        if (curr.getSecond().equals(prev.getSecond())) {
          consecutiveXYTies++;
        } else {
          tiedXYPairs += consecutiveXYTies * (consecutiveXYTies - 1) / 2;
          consecutiveXYTies = 1;
        }
      } else {
        tiedXPairs += consecutiveXTies * (consecutiveXTies - 1) / 2;
        consecutiveXTies = 1;
        tiedXYPairs += consecutiveXYTies * (consecutiveXYTies - 1) / 2;
        consecutiveXYTies = 1;
      }
      prev = curr;
    }
    tiedXPairs += consecutiveXTies * (consecutiveXTies - 1) / 2;
    tiedXYPairs += consecutiveXYTies * (consecutiveXYTies - 1) / 2;

    int swaps = 0;
    @SuppressWarnings("unchecked")
    Pair<Double, Double>[] pairsDestination = new Pair[n];
    for (int segmentSize = 1; segmentSize < n; segmentSize <<= 1) {
      for (int offset = 0; offset < n; offset += 2 * segmentSize) {
        int i = offset;
        final int iEnd = FastMath.min(i + segmentSize, n);
        int j = iEnd;
        final int jEnd = FastMath.min(j + segmentSize, n);

        int copyLocation = offset;
        while (i < iEnd || j < jEnd) {
          if (i < iEnd) {
            if (j < jEnd) {
              if (pairs[i].getSecond().compareTo(pairs[j].getSecond()) <= 0) {
                pairsDestination[copyLocation] = pairs[i];
                i++;
              } else {
                pairsDestination[copyLocation] = pairs[j];
                j++;
                swaps += iEnd - i;
              }
            } else {
              pairsDestination[copyLocation] = pairs[i];
              i++;
            }
          } else {
            pairsDestination[copyLocation] = pairs[j];
            j++;
          }
          copyLocation++;
        }
      }
      final Pair<Double, Double>[] pairsTemp = pairs;
      pairs = pairsDestination;
      pairsDestination = pairsTemp;
    }

    int tiedYPairs = 0;
    int consecutiveYTies = 1;
    prev = pairs[0];
    for (int i = 1; i < n; i++) {
      final Pair<Double, Double> curr = pairs[i];
      if (curr.getSecond().equals(prev.getSecond())) {
        consecutiveYTies++;
      } else {
        tiedYPairs += consecutiveYTies * (consecutiveYTies - 1) / 2;
        consecutiveYTies = 1;
      }
      prev = curr;
    }
    tiedYPairs += consecutiveYTies * (consecutiveYTies - 1) / 2;

    int concordantMinusDiscordant = numPairs - tiedXPairs - tiedYPairs + tiedXYPairs - 2 * swaps;
    return concordantMinusDiscordant
        / FastMath.sqrt((numPairs - tiedXPairs) * (numPairs - tiedYPairs));
  }
  // Ignored already selected dimensions
  private Map<String, Collection<String>> retrieveDimensionValues(
      String collection,
      long baselineMillis,
      long currentMillis,
      double contributionThreshold,
      int dimensionValuesLimit)
      throws Exception {
    List<String> dimensions = getAllDimensions(collection);
    DateTime baseline = new DateTime(baselineMillis);
    DateTime current = new DateTime(currentMillis);

    List<String> metrics = getMetrics(collection);
    String dummyFunction =
        String.format(
            DIMENSION_VALUES_OPTIONS_METRIC_FUNCTION, METRIC_FUNCTION_JOINER.join(metrics));

    MultivaluedMap<String, String> dimensionValues = new MultivaluedMapImpl();
    Map<String, Future<QueryResult>> resultFutures = new HashMap<>();
    // query w/ group by for each dimension.
    for (String dimension : dimensions) {
      // Generate SQL
      dimensionValues.put(dimension, Arrays.asList("!"));
      String sql =
          SqlUtils.getSql(dummyFunction, collection, baseline, current, dimensionValues, null);
      LOGGER.info("Generated SQL for dimension retrieval {}: {}", serverUri, sql);
      dimensionValues.remove(dimension);

      // Query (in parallel)
      resultFutures.put(dimension, queryCache.getQueryResultAsync(serverUri, sql));
    }

    Map<String, Collection<String>> collectedDimensionValues = new HashMap<>();
    // Wait for all queries and generate the ordered list from the result.
    for (int i = 0; i < dimensions.size(); i++) {
      String dimension = dimensions.get(i);
      QueryResult queryResult = resultFutures.get(dimension).get();

      // Sum up hourly data over entire dataset for each dimension combination
      int metricCount = metrics.size();
      double[] total = new double[metricCount];
      Map<String, double[]> summedValues = new HashMap<>();

      for (Map.Entry<String, Map<String, Number[]>> entry : queryResult.getData().entrySet()) {
        double[] sum = new double[metricCount];
        for (Map.Entry<String, Number[]> hourlyEntry : entry.getValue().entrySet()) {
          for (int j = 0; j < metricCount; j++) {
            double value = hourlyEntry.getValue()[j].doubleValue();
            sum[j] += value;
          }
        }
        summedValues.put(entry.getKey(), sum);
        // update total w/ sums for each dimension value.
        for (int j = 0; j < metricCount; j++) {
          total[j] += sum[j];
        }
      }

      // compare by value ascending (want poll to remove smallest element)
      List<PriorityQueue<Pair<String, Double>>> topNValuesByMetric =
          new ArrayList<PriorityQueue<Pair<String, Double>>>(metricCount);
      double[] threshold = new double[metricCount];
      Comparator<Pair<String, Double>> valueComparator =
          new Comparator<Pair<String, Double>>() {
            @Override
            public int compare(Pair<String, Double> a, Pair<String, Double> b) {
              return Double.compare(a.getValue().doubleValue(), b.getValue().doubleValue());
            }
          };
      for (int j = 0; j < metricCount; j++) {
        threshold[j] = total[j] * contributionThreshold;
        topNValuesByMetric.add(new PriorityQueue<>(dimensionValuesLimit, valueComparator));
      }

      // For each dimension value, add it only if it meets the threshold and drop an element from
      // the priority queue if over the limit.
      for (Map.Entry<String, double[]> entry : summedValues.entrySet()) {
        List<String> combination = objectMapper.readValue(entry.getKey(), LIST_TYPE_REF);
        String dimensionValue = combination.get(i);
        for (int j = 0; j < metricCount; j++) { // metricCount == entry.getValue().length
          double dimensionValueContribution = entry.getValue()[j];
          if (dimensionValueContribution >= threshold[j]) {
            PriorityQueue<Pair<String, Double>> topNValues = topNValuesByMetric.get(j);
            topNValues.add(new Pair<>(dimensionValue, dimensionValueContribution));
            if (topNValues.size() > dimensionValuesLimit) {
              topNValues.poll();
            }
          }
        }
      }

      // Poll returns the elements in order of ascending contribution, so poll and reverse the
      // order.

      // not LinkedHashSet because we need to reverse insertion order with metrics.
      List<String> sortedValues = new ArrayList<>();
      HashSet<String> sortedValuesSet = new HashSet<>();

      for (int j = 0; j < metricCount; j++) {
        PriorityQueue<Pair<String, Double>> topNValues = topNValuesByMetric.get(j);
        int startIndex = sortedValues.size();
        while (!topNValues.isEmpty()) {
          Pair<String, Double> pair = topNValues.poll();
          String dimensionValue = pair.getKey();
          if (!sortedValuesSet.contains(dimensionValue)) {
            sortedValues.add(startIndex, dimensionValue);
            sortedValuesSet.add(dimensionValue);
          }
        }
      }

      collectedDimensionValues.put(dimension, sortedValues);
    }
    return collectedDimensionValues;
  }
 private void loadFeaturesData(Pair<User, User> pair) {
   usersData.put(pair.getFirst(), new UserFeaturesDataLoader(pair.getFirst(), interval).get());
   usersData.put(pair.getSecond(), new UserFeaturesDataLoader(pair.getSecond(), interval).get());
 }
  public MedlineGenerator(String seed) {
    String[] fields = seed.split("\\r?\\n");

    for (String fieldData : fields) {
      if (fieldData.length() < 4) {
        continue;
      }
      final Scanner scanner = new Scanner(fieldData);
      scanner.useDelimiter("\\t");
      String fieldName = scanner.next();
      scanner.useDelimiter("; ");
      scanner.findInLine("\t");

      MedlineFieldDefinition defn = MedlineFieldDefinitions.getDefinition(fieldName);

      MedlineFieldDefinition.FieldType fieldType =
          defn != null ? defn.type : MedlineFieldDefinition.FieldType.SINGLE_TEXT_VALUE;

      BaseFieldModel fieldModel = null;

      Iterable<Pair<Long, String>> scannerIterator =
          new Iterable<Pair<Long, String>>() {
            @Override
            public Iterator<Pair<Long, String>> iterator() {
              return new Iterator<Pair<Long, String>>() {
                @Override
                public boolean hasNext() {
                  return scanner.hasNext();
                }

                @Override
                public Pair<Long, String> next() {
                  String value = "";
                  String[] data;
                  do {
                    String next = scanner.next();
                    data = next.split("\\t");
                    if (data.length > 2) {
                      throw new IllegalStateException(
                          String.format("Cannot parse word: '%s'", value + next));
                    }
                    value += data[0];
                  } while (data.length < 2);
                  return new Pair<>(Long.parseLong(data[1]), value);
                }

                @Override
                public void remove() {
                  throw new NotImplementedException();
                }
              };
            }
          };

      switch (fieldType) {
        case ARRAY_TEXT_VALUES:
        case SINGLE_TEXT_VALUE:
        case WORDS:
          SimpleFieldModel model = new SimpleFieldModel(fieldName, fieldType);
          for (Pair<Long, String> pair : scannerIterator) {
            model.addValue(pair.getKey(), pair.getValue());
          }
          fieldModel = model;
          break;
        case SINGLE_OBJECT_VALUE:
          ObjectFieldModel objectModel = new ObjectFieldModel(fieldName);
          for (Pair<Long, String> pair : scannerIterator) {
            long weight = pair.getKey();
            String propertyData = pair.getValue();
            int firstColonIndex = propertyData.indexOf(':');
            String propertyName = propertyData.substring(0, firstColonIndex);
            String propertyValue = propertyData.substring(firstColonIndex);
            objectModel.addValue(propertyName, weight, propertyValue);
          }
          fieldModel = objectModel;
          break;
      }

      fieldModels.put(fieldName, fieldModel);
    }
  }