public void doInference() { try { ParallelTopicModel model = ParallelTopicModel.read(new File(inferencerFile)); TopicInferencer inferencer = model.getInferencer(); // TopicInferencer inferencer = // TopicInferencer.read(new File(inferencerFile)); // InstanceList testing = readFile(); readFile(); InstanceList testing = generateInstanceList(); // readFile(); for (int i = 0; i < testing.size(); i++) { StringBuilder probabilities = new StringBuilder(); double[] testProbabilities = inferencer.getSampledDistribution(testing.get(i), 10, 1, 5); ArrayList probabilityList = new ArrayList(); for (int j = 0; j < testProbabilities.length; j++) { probabilityList.add(new Pair<Integer, Double>(j, testProbabilities[j])); } Collections.sort(probabilityList, new CustomComparator()); for (int j = 0; j < testProbabilities.length && j < topN; j++) { if (j > 0) probabilities.append(" "); probabilities.append( ((Pair<Integer, Double>) probabilityList.get(j)).getFirst().toString() + "," + ((Pair<Integer, Double>) probabilityList.get(j)).getSecond().toString()); } System.out.println(docIds.get(i) + "," + probabilities.toString()); } } catch (Exception e) { e.printStackTrace(); System.err.println(e.getMessage()); } }
public static void main(String[] args) { // String malletFile = "dataset/vlc_lectures.all.en.f8.mallet"; // String simFile = "dataset/vlc/sim5p.csv"; // String solutionFile = "dataset/vlc/task1_solution.en.f8.lm.txt"; // String queryFile = "dataset/task1_query.en.f8.txt"; // String targetFile = "dataset/task1_target.en.f8.txt"; String malletFile = "dataset/vlc/folds/all.0.4189.mallet"; String trainMalletFile = "dataset/vlc/folds/training.0.mallet"; String testMalletFile = "dataset/vlc/folds/test.0.mallet"; String queryFile = "dataset/vlc/folds/query.0.csv"; String linkFile = "dataset/vlc/folds/trainingPairs.0.csv"; String targetFile = "dataset/vlc/folds/target.0.csv"; String solutionFile = "dataset/vlc/task1_solution.en.f8.lm.txt"; int numTopics = 160; int numIterations = 200; double alpha = 0.0016; double beta = 0.0001; InstanceList train = InstanceList.load(new File(trainMalletFile)); InstanceList test = InstanceList.load(new File(testMalletFile)); SeparateParallelLda spl = new SeparateParallelLda(train, test); spl.trainDocuments(numTopics, numIterations, alpha, beta); spl.generateTestInference(); spl.lda.printTopWords(System.out, 10, true); BasicTask1Solution solver = new Task1SolutionWithSeparateData(spl); double precision; try { solver.retrieveTask1Solution(queryFile, solutionFile); precision = Task1Solution.evaluateResult(targetFile, solutionFile); System.out.println( String.format( "SeparateParallelLda: iteration: %d, precisoion: %f", numIterations, precision)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }