public void printDocumentTopics(PrintWriter pw, double threshold, int max) { pw.println("#doc source topic proportion ..."); int docLen; double topicDist[] = new double[topics.length]; for (int di = 0; di < topics.length; di++) { pw.print(di); pw.print(' '); if (ilist.get(di).getSource() != null) { pw.print(ilist.get(di).getSource().toString()); } else { pw.print("null-source"); } pw.print(' '); docLen = topics[di].length; for (int ti = 0; ti < numTopics; ti++) topicDist[ti] = (((float) docTopicCounts[di][ti]) / docLen); if (max < 0) max = numTopics; for (int tp = 0; tp < max; tp++) { double maxvalue = 0; int maxindex = -1; for (int ti = 0; ti < numTopics; ti++) if (topicDist[ti] > maxvalue) { maxvalue = topicDist[ti]; maxindex = ti; } if (maxindex == -1 || topicDist[maxindex] < threshold) break; pw.print(maxindex + " " + topicDist[maxindex] + " "); topicDist[maxindex] = 0; } pw.println(' '); } }
public void printState(PrintWriter pw) { Alphabet a = ilist.getDataAlphabet(); pw.println("#doc pos typeindex type topic"); for (int di = 0; di < topics.length; di++) { FeatureSequence fs = (FeatureSequence) ilist.get(di).getData(); for (int si = 0; si < topics[di].length; si++) { int type = fs.getIndexAtPosition(si); pw.print(di); pw.print(' '); pw.print(si); pw.print(' '); pw.print(type); pw.print(' '); pw.print(a.lookupObject(type)); pw.print(' '); pw.print(topics[di][si]); pw.println(); } } }
public void printState(PrintWriter pw) { pw.println("#doc pos typeindex type bigrampossible? topic bigram"); for (int di = 0; di < topics.length; di++) { FeatureSequenceWithBigrams fs = (FeatureSequenceWithBigrams) ilist.get(di).getData(); for (int si = 0; si < topics[di].length; si++) { int type = fs.getIndexAtPosition(si); pw.print(di); pw.print(' '); pw.print(si); pw.print(' '); pw.print(type); pw.print(' '); pw.print(uniAlphabet.lookupObject(type)); pw.print(' '); pw.print(fs.getBiIndexAtPosition(si) == -1 ? 0 : 1); pw.print(' '); pw.print(topics[di][si]); pw.print(' '); pw.print(grams[di][si]); pw.println(); } } }
public static void main(String[] args) throws Exception { InstanceList instances = InstanceList.load(new File(args[0])); int numTopics = Integer.parseInt(args[1]); ParallelTopicModel model = new ParallelTopicModel(numTopics, 5.0, 0.01); model.addInstances(instances); model.setNumIterations(1000); model.estimate(); TopicModelDiagnostics diagnostics = new TopicModelDiagnostics(model, 20); if (args.length == 3) { PrintWriter out = new PrintWriter(args[2]); out.println(diagnostics.toXML()); out.close(); } }