예제 #1
0
  @Override
  public void trainMostSimilar(List<EnsembleSim> simList) {
    if (simList.isEmpty()) {
      throw new IllegalStateException("no examples to train on!");
    }
    mostSimilarInterpolator.trainMostSimilar(simList);

    // Remove things that have no observed metrics
    List<EnsembleSim> pruned = new ArrayList<EnsembleSim>();
    for (EnsembleSim es : simList) {
      if (es != null && es.getNumMetricsWithScore() > 0) {
        pruned.add(es);
      }
    }

    double[][] X = new double[pruned.size()][numMetrics * 2];
    double[] Y = new double[pruned.size()];
    for (int i = 0; i < pruned.size(); i++) {
      Y[i] = pruned.get(i).knownSim.similarity;
      EnsembleSim es = mostSimilarInterpolator.interpolate(pruned.get(i));
      for (int j = 0; j < numMetrics; j++) {
        X[i][2 * j] = es.getScores().get(j);
        X[i][2 * j + 1] = Math.log(es.getRanks().get(j) + 1);
      }
    }

    OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
    regression.newSampleData(Y, X);

    mostSimilarCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters());
    double pearson = Math.sqrt(regression.calculateRSquared());
    LOG.info("coefficients are " + mostSimilarCoefficients.toString());
    LOG.info("pearson for multiple regression is " + pearson);
  }
예제 #2
0
  @Override
  public void trainSimilarity(List<EnsembleSim> simList) {
    if (simList.isEmpty()) {
      throw new IllegalArgumentException("no examples to train on!");
    }
    similarityInterpolator.trainSimilarity(simList);
    double[][] X = new double[simList.size()][numMetrics];
    double[] Y = new double[simList.size()];
    for (int i = 0; i < simList.size(); i++) {
      Y[i] = simList.get(i).knownSim.similarity;
      EnsembleSim es = similarityInterpolator.interpolate(simList.get(i));
      for (int j = 0; j < numMetrics; j++) {
        X[i][j] = es.getScores().get(j);
      }
    }
    OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
    regression.newSampleData(Y, X);

    simlarityCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters());
    double pearson = Math.sqrt(regression.calculateRSquared());
    LOG.info("coefficients are " + simlarityCoefficients.toString());
    LOG.info("pearson for multiple regression is " + pearson);
  }