/** * Convert data to probability co-occurrences (aka calculating the kernel) * * @param d the data to convert * @param u the perplexity of the model * @return the probabilities of co-occurrence */ public INDArray computeGaussianPerplexity(final INDArray d, double u) { int n = d.rows(); final INDArray p = zeros(n, n); final INDArray beta = ones(n, 1); final double logU = Math.log(u); log.info("Calculating probabilities of data similarities.."); for (int i = 0; i < n; i++) { if (i % 500 == 0 && i > 0) log.info("Handled " + i + " records"); double betaMin = Double.NEGATIVE_INFINITY; double betaMax = Double.POSITIVE_INFINITY; int[] vals = Ints.concat(ArrayUtil.range(0, i), ArrayUtil.range(i + 1, d.columns())); INDArrayIndex[] range = new INDArrayIndex[] {new NDArrayIndex(vals)}; INDArray row = d.slice(i).get(range); Pair<INDArray, INDArray> pair = hBeta(row, beta.getDouble(i)); INDArray hDiff = pair.getFirst().sub(logU); int tries = 0; // while hdiff > tolerance while (BooleanIndexing.and(abs(hDiff), Conditions.greaterThan(tolerance)) && tries < 50) { // if hdiff > 0 if (BooleanIndexing.and(hDiff, Conditions.greaterThan(0))) { if (Double.isInfinite(betaMax)) beta.putScalar(i, beta.getDouble(i) * 2.0); else beta.putScalar(i, (beta.getDouble(i) + betaMax) / 2.0); betaMin = beta.getDouble(i); } else { if (Double.isInfinite(betaMin)) beta.putScalar(i, beta.getDouble(i) / 2.0); else beta.putScalar(i, (beta.getDouble(i) + betaMin) / 2.0); betaMax = beta.getDouble(i); } pair = hBeta(row, beta.getDouble(i)); hDiff = pair.getFirst().subi(logU); tries++; } p.slice(i).put(range, pair.getSecond()); } // dont need data in memory after log.info("Mean value of sigma " + sqrt(beta.rdiv(1)).mean(Integer.MAX_VALUE)); BooleanIndexing.applyWhere(p, Conditions.isNan(), new Value(realMin)); // set 0 along the diagonal INDArray permute = p.transpose(); INDArray pOut = p.add(permute); pOut.divi(pOut.sum(Integer.MAX_VALUE)); BooleanIndexing.applyWhere( pOut, Conditions.lessThan(Nd4j.EPS_THRESHOLD), new Value(Nd4j.EPS_THRESHOLD)); // ensure no nans return pOut; }