@Override public void trainMostSimilar(List<EnsembleSim> simList) { if (simList.isEmpty()) { throw new IllegalStateException("no examples to train on!"); } mostSimilarInterpolator.trainMostSimilar(simList); // Remove things that have no observed metrics List<EnsembleSim> pruned = new ArrayList<EnsembleSim>(); for (EnsembleSim es : simList) { if (es != null && es.getNumMetricsWithScore() > 0) { pruned.add(es); } } double[][] X = new double[pruned.size()][numMetrics * 2]; double[] Y = new double[pruned.size()]; for (int i = 0; i < pruned.size(); i++) { Y[i] = pruned.get(i).knownSim.similarity; EnsembleSim es = mostSimilarInterpolator.interpolate(pruned.get(i)); for (int j = 0; j < numMetrics; j++) { X[i][2 * j] = es.getScores().get(j); X[i][2 * j + 1] = Math.log(es.getRanks().get(j) + 1); } } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y, X); mostSimilarCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters()); double pearson = Math.sqrt(regression.calculateRSquared()); LOG.info("coefficients are " + mostSimilarCoefficients.toString()); LOG.info("pearson for multiple regression is " + pearson); }
@Override public void trainSimilarity(List<EnsembleSim> simList) { if (simList.isEmpty()) { throw new IllegalArgumentException("no examples to train on!"); } similarityInterpolator.trainSimilarity(simList); double[][] X = new double[simList.size()][numMetrics]; double[] Y = new double[simList.size()]; for (int i = 0; i < simList.size(); i++) { Y[i] = simList.get(i).knownSim.similarity; EnsembleSim es = similarityInterpolator.interpolate(simList.get(i)); for (int j = 0; j < numMetrics; j++) { X[i][j] = es.getScores().get(j); } } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y, X); simlarityCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters()); double pearson = Math.sqrt(regression.calculateRSquared()); LOG.info("coefficients are " + simlarityCoefficients.toString()); LOG.info("pearson for multiple regression is " + pearson); }