@Override public void trainMostSimilar(List<EnsembleSim> simList) { if (simList.isEmpty()) { throw new IllegalStateException("no examples to train on!"); } mostSimilarInterpolator.trainMostSimilar(simList); // Remove things that have no observed metrics List<EnsembleSim> pruned = new ArrayList<EnsembleSim>(); for (EnsembleSim es : simList) { if (es != null && es.getNumMetricsWithScore() > 0) { pruned.add(es); } } double[][] X = new double[pruned.size()][numMetrics * 2]; double[] Y = new double[pruned.size()]; for (int i = 0; i < pruned.size(); i++) { Y[i] = pruned.get(i).knownSim.similarity; EnsembleSim es = mostSimilarInterpolator.interpolate(pruned.get(i)); for (int j = 0; j < numMetrics; j++) { X[i][2 * j] = es.getScores().get(j); X[i][2 * j + 1] = Math.log(es.getRanks().get(j) + 1); } } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y, X); mostSimilarCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters()); double pearson = Math.sqrt(regression.calculateRSquared()); LOG.info("coefficients are " + mostSimilarCoefficients.toString()); LOG.info("pearson for multiple regression is " + pearson); }
public static double[] interpolate( double min, double max, double step, double curve[][], double result[]) { Interpolator interpolator = new Interpolator(); if (result == null) result = new double[(int) ((max - min) / step)]; for (int i = 0; i < result.length; i++) result[i] = interpolator.interpolate(min + step * i, curve); return result; }
public static short[] interpolate( double min, double max, double step, double curve[][], short result[]) { Interpolator interpolator = new Interpolator(); if (result == null) result = new short[(int) ((max - min) / step)]; for (int i = 0; i < result.length; i++) result[i] = (short) ((int) (interpolator.interpolate(min + step * i, curve) * 0xffff + 0.5) & 0xffff); return result; }
@Override public void trainSimilarity(List<EnsembleSim> simList) { if (simList.isEmpty()) { throw new IllegalArgumentException("no examples to train on!"); } similarityInterpolator.trainSimilarity(simList); double[][] X = new double[simList.size()][numMetrics]; double[] Y = new double[simList.size()]; for (int i = 0; i < simList.size(); i++) { Y[i] = simList.get(i).knownSim.similarity; EnsembleSim es = similarityInterpolator.interpolate(simList.get(i)); for (int j = 0; j < numMetrics; j++) { X[i][j] = es.getScores().get(j); } } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y, X); simlarityCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters()); double pearson = Math.sqrt(regression.calculateRSquared()); LOG.info("coefficients are " + simlarityCoefficients.toString()); LOG.info("pearson for multiple regression is " + pearson); }
/** * Returns the next character in the filtered stream, replacing tokens from the original stream. * * @return the next character in the resulting stream, or -1 if the end of the resulting stream * has been reached * @exception IOException if the underlying stream throws an IOException during reading */ public int read() throws IOException { if (replaceIndex != -1 && replaceIndex < replaceData.length()) { int ch = replaceData.charAt(replaceIndex++); if (replaceIndex >= replaceData.length()) { replaceIndex = -1; } return ch; } int ch = -1; if (previousIndex != -1 && previousIndex < this.endToken.length()) { ch = this.endToken.charAt(previousIndex++); } else { ch = in.read(); } if (ch == this.beginToken.charAt(0)) { StringBuffer key = new StringBuffer(); key.append((char) ch); int beginTokenMatchPos = 1; do { if (previousIndex != -1 && previousIndex < this.endToken.length()) { ch = this.endToken.charAt(previousIndex++); } else { ch = in.read(); } if (ch != -1) { key.append((char) ch); if ((beginTokenMatchPos < this.beginToken.length()) && (ch != this.beginToken.charAt(beginTokenMatchPos++))) { ch = -1; // not really EOF but to trigger code below break; } } else { break; } } while (ch != this.endToken.charAt(0)); // now test endToken if (ch != -1 && this.endToken.length() > 1) { int endTokenMatchPos = 1; do { if (previousIndex != -1 && previousIndex < this.endToken.length()) { ch = this.endToken.charAt(previousIndex++); } else { ch = in.read(); } if (ch != -1) { key.append((char) ch); if (ch != this.endToken.charAt(endTokenMatchPos++)) { ch = -1; // not really EOF but to trigger code below break; } } else { break; } } while (endTokenMatchPos < this.endToken.length()); } // There is nothing left to read so we have the situation where the begin/end token // are in fact the same and as there is nothing left to read we have got ourselves // end of a token boundary so let it pass through. if (ch == -1) { replaceData = key.toString(); replaceIndex = 1; return replaceData.charAt(0); } String value; try { value = interpolator.interpolate(key.toString(), ""); } catch (InterpolationException e) { IllegalArgumentException error = new IllegalArgumentException(e.getMessage()); error.initCause(e); throw error; } if (value != null) { if (value.length() != 0) { replaceData = value; replaceIndex = 0; } return read(); } else { previousIndex = 0; replaceData = key.substring(0, key.length() - this.endToken.length()); replaceIndex = 0; return this.beginToken.charAt(0); } } return ch; }