@Override public double[][] predict(List<PredictionPaper> testDocs) { String testData = "lda/test.dat"; createLdaInputTest(testData, testDocs); Utils.runCommand( "lib/lda-c-dist/lda inf " + " lib/lda-c-dist/settings.txt " + "lda/final " + testData + " lda/output", false); double[][] gammasMatrix = Utils.readMatrix("lda/output-gamma.dat", false); double alpha = Utils.readAlpha("lda/final.other"); for (int i = 0; i < gammasMatrix.length; i++) { for (int j = 0; j < gammasMatrix[i].length; j++) { gammasMatrix[i][j] -= alpha; } } SimpleMatrix gammas = new SimpleMatrix(gammasMatrix); SimpleMatrix beta = new SimpleMatrix(betaMatrix); SimpleMatrix probabilities = gammas.mult(beta); double[][] result = new double[probabilities.numRows()][probabilities.numCols()]; for (int row = 0; row < probabilities.numRows(); row++) { for (int col = 0; col < probabilities.numCols(); col++) { result[row][col] = probabilities.get(row, col); } } return result; }
private void train() { if (!new File("kmeans").exists()) { new File("kmeans").mkdir(); } createKmeansInput("kmeans/documents", trainingSet); Utils.runCommand( "python src/plusone/clustering/kmeans.py " + "kmeans/documents -k " + numTopics + " -m cosine " + "-w kmeans -i 50 -q", true); betaMatrix = Utils.readMatrix("kmeans/centers", false); System.out.print("moving kmeans beta to lda folder..."); Utils.runCommand("cp kmeans/centers lda", false); Utils.runCommand("mv lda/centers lda/final.beta", true); Utils.createLdaInfo("kmeans/final.other", numTopics, terms.size(), synthetic); Utils.runCommand("cp kmeans/final.other lda", true); System.out.println("done."); }