public void trainDocuments( InstanceList documents, int numTopics, int numIterations, double alpha, double beta) { double alphaSum = alpha * numTopics; lda = new ParallelTopicModel(numTopics, alphaSum, beta); lda.addInstances(documents); lda.setNumThreads(4); lda.setNumIterations(numIterations); lda.printLogLikelihood = false; try { lda.estimate(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("LDA parameter, alphaSum: " + lda.alphaSum + ", beta: " + lda.beta); }
protected double queryVsmSimilarity(int qdocId, int targetDocId) { // TODO Auto-generated method stub double predSim = 0; double[] v1 = lda.getTopicProbabilities(qdocId); double[] v2 = testTopicDistribution[targetDocId]; predSim = rs.util.vlc.Util.cosineProduct(v1, v2); return predSim; }
public void generateTestInference() { if (lda == null) { System.out.println("Should run lda estimation first."); System.exit(1); return; } if (testTopicDistribution == null) testTopicDistribution = new double[test.size()][]; TopicInferencer infer = lda.getInferencer(); int iterations = 800; int thinning = 5; int burnIn = 100; for (int ti = 0; ti < test.size(); ti++) { testTopicDistribution[ti] = infer.getSampledDistribution(test.get(ti), iterations, thinning, burnIn); } }