public String toString() { StringBuilder out = new StringBuilder(); Formatter formatter = new Formatter(out, Locale.US); for (int topic = 0; topic < numTopics; topic++) { formatter.format("Topic %d", topic); for (TopicScores scores : diagnostics) { formatter.format("\t%s=%.4f", scores.name, scores.scores[topic]); } formatter.format("\n"); for (int position = 0; position < topicTopWords[topic].length; position++) { if (topicTopWords[topic][position] == null) { break; } formatter.format(" %s", topicTopWords[topic][position]); for (TopicScores scores : diagnostics) { if (scores.wordScoresDefined) { formatter.format("\t%s=%.4f", scores.name, scores.topicWordScores[topic][position]); } } out.append("\n"); } } return out.toString(); }
public void doInference() { try { ParallelTopicModel model = ParallelTopicModel.read(new File(inferencerFile)); TopicInferencer inferencer = model.getInferencer(); // TopicInferencer inferencer = // TopicInferencer.read(new File(inferencerFile)); // InstanceList testing = readFile(); readFile(); InstanceList testing = generateInstanceList(); // readFile(); for (int i = 0; i < testing.size(); i++) { StringBuilder probabilities = new StringBuilder(); double[] testProbabilities = inferencer.getSampledDistribution(testing.get(i), 10, 1, 5); ArrayList probabilityList = new ArrayList(); for (int j = 0; j < testProbabilities.length; j++) { probabilityList.add(new Pair<Integer, Double>(j, testProbabilities[j])); } Collections.sort(probabilityList, new CustomComparator()); for (int j = 0; j < testProbabilities.length && j < topN; j++) { if (j > 0) probabilities.append(" "); probabilities.append( ((Pair<Integer, Double>) probabilityList.get(j)).getFirst().toString() + "," + ((Pair<Integer, Double>) probabilityList.get(j)).getSecond().toString()); } System.out.println(docIds.get(i) + "," + probabilities.toString()); } } catch (Exception e) { e.printStackTrace(); System.err.println(e.getMessage()); } }
public String toXML() { int[] tokensPerTopic = model.tokensPerTopic; StringBuilder out = new StringBuilder(); Formatter formatter = new Formatter(out, Locale.US); out.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); out.append("<model>\n"); for (int topic = 0; topic < numTopics; topic++) { int[][] matrix = topicCodocumentMatrices[topic]; formatter.format("<topic id='%d'", topic); for (TopicScores scores : diagnostics) { formatter.format(" %s='%.4f'", scores.name, scores.scores[topic]); } out.append(">\n"); TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); // How many words should we report? Some topics may have fewer than // the default number of words with non-zero weight. int limit = numTopWords; if (sortedWords.size() < numTopWords) { limit = sortedWords.size(); } double cumulativeProbability = 0.0; Iterator<IDSorter> iterator = sortedWords.iterator(); for (int position = 0; position < limit; position++) { IDSorter info = iterator.next(); double probability = info.getWeight() / tokensPerTopic[topic]; cumulativeProbability += probability; formatter.format( "<word rank='%d' count='%.0f' prob='%.5f' cumulative='%.5f' docs='%d'", position + 1, info.getWeight(), probability, cumulativeProbability, matrix[position][position]); for (TopicScores scores : diagnostics) { if (scores.wordScoresDefined) { formatter.format(" %s='%.4f'", scores.name, scores.topicWordScores[topic][position]); } } formatter.format( ">%s</word>\n", topicTopWords[topic][position].replaceAll("&", "&").replaceAll("<", ">")); } out.append("</topic>\n"); } out.append("</model>\n"); return out.toString(); }