@Override public RealVector computeSyntheticValues(SampleIterator it) { it.reset(); Sample sample = it.next(); int inputDim = sample.getEncodedInput().getDimension(); int outputDim = sample.getEncodedOutput().getDimension(); RealVector v = new ArrayRealVector(outputDim); for (int out = 0; out < outputDim; out++) { it.reset(); double eta = generalizedCorrelationRatio(it, inputDim, out); if (eta < 0) return null; v.setEntry(out, eta); } return v; }
@Override public RealMatrix computeIOCorrelationMatrix(SampleIterator it) { it.reset(); Sample sample = it.next(); int inputDim = sample.getEncodedInput().getDimension(); int outputDim = sample.getEncodedOutput().getDimension(); RealMatrix M = new Array2DRowRealMatrix(inputDim, outputDim); for (int i = 0; i < inputDim; i++) { for (int j = 0; j < outputDim; j++) { it.reset(); M.setEntry(i, j, correlationRatio(it, i, j)); } } return M; }
private double correlationRatio(SampleIterator it, int in, int out) { Map<Double, Integer> n_y = new HashMap<>(); Map<Double, RealVector> x_y = new HashMap<>(); RealVector x = new ArrayRealVector(); for (int i = 0; i < maxSamples && it.hasNext(); i++) { Sample sample = it.next(); double input = sample.getEncodedInput().getEntry(in); double output = sample.getEncodedOutput().getEntry(out); if (!n_y.containsKey(output)) { n_y.put(output, 0); x_y.put(output, new ArrayRealVector()); } input = injectNoise(input); n_y.put(output, n_y.get(output) + 1); x_y.put(output, x_y.get(output).append(input)); x = x.append(input); } double x_mean = StatUtils.mean(x.toArray()); Map<Double, Double> x_y_mean = new HashMap<>(); for (Entry<Double, RealVector> entry : x_y.entrySet()) { x_y_mean.put(entry.getKey(), StatUtils.mean(entry.getValue().toArray())); } double numerator = 0; for (double key : n_y.keySet()) { numerator += n_y.get(key) * (x_y_mean.get(key) - x_mean) * (x_y_mean.get(key) - x_mean); } double denominator = 0; for (double e : x.toArray()) { denominator += (e - x_mean) * (e - x_mean); } double eta = denominator == 0 ? 0 : Math.sqrt(numerator / denominator); return eta; }
private double generalizedCorrelationRatio(SampleIterator it, int inputDim, int out) { Map<Double, Integer> n_y = new HashMap<>(); Map<Double, MultivariateSummaryStatistics> stat_y = new HashMap<>(); List<RealMatrix> x = new ArrayList<>(); MultivariateSummaryStatistics stat = new MultivariateSummaryStatistics(inputDim, unbiased); for (int i = 0; i < maxSamples && it.hasNext(); i++) { Sample sample = it.next(); double[] input = sample.getEncodedInput().toArray(); double output = sample.getEncodedOutput().getEntry(out); if (!n_y.containsKey(output)) { n_y.put(output, 0); stat_y.put(output, new MultivariateSummaryStatistics(inputDim, unbiased)); } injectNoise(input); n_y.put(output, n_y.get(output) + 1); stat_y.get(output).addValue(input); x.add(new Array2DRowRealMatrix(input)); stat.addValue(input); } RealMatrix x_sum = new Array2DRowRealMatrix(stat.getSum()); Map<Double, RealMatrix> x_y_sum = new HashMap<>(); for (Entry<Double, MultivariateSummaryStatistics> entry : stat_y.entrySet()) { x_y_sum.put(entry.getKey(), new Array2DRowRealMatrix(entry.getValue().getSum())); } RealMatrix H = new Array2DRowRealMatrix(inputDim, inputDim); RealMatrix temp = new Array2DRowRealMatrix(inputDim, inputDim); for (double key : n_y.keySet()) { temp = temp.add( x_y_sum .get(key) .multiply(x_y_sum.get(key).transpose()) .scalarMultiply(1.0 / n_y.get(key))); } H = temp.subtract(x_sum.multiply(x_sum.transpose()).scalarMultiply(1.0 / x.size())); RealMatrix E = new Array2DRowRealMatrix(inputDim, inputDim); for (RealMatrix m : x) { E = E.add(m.multiply(m.transpose())); } E = E.subtract(temp); List<Integer> zeroColumns = findZeroColumns(E); E = removeZeroColumns(E, zeroColumns); H = removeZeroColumns(H, zeroColumns); Matrix JE = new Matrix(E.getData()); Matrix JH = new Matrix(H.getData()); if (JE.rank() < JE.getRowDimension()) { Log.write(this, "Some error occurred (E matrix is singular)"); return -1; } else { double lambda; if (useEigenvalues) { Matrix L = JE.inverse().times(JH); double[] eigs = L.eig().getRealEigenvalues(); Arrays.sort(eigs); lambda = 1; int nonNullEigs = n_y.keySet().size() - 1; for (int i = eigs.length - nonNullEigs; i < eigs.length; i++) { if (Math.abs(eigs[i]) < zeroThreshold) { Log.write(this, "Some error occurred (E matrix has too many null eigenvalues)"); return -1; } lambda *= 1.0 / (1.0 + eigs[i]); } } else { Matrix sum = JE.plus(JH); if (sum.rank() < sum.getRowDimension()) { Log.write(this, "Some error occourred (E+H is singular"); return -1; } lambda = JE.det() / sum.det(); } return Math.sqrt(1 - lambda); } }