Пример #1
0
 public void printDocumentTopics(PrintWriter pw, double threshold, int max) {
   pw.println("#doc source topic proportion ...");
   int docLen;
   double topicDist[] = new double[topics.length];
   for (int di = 0; di < topics.length; di++) {
     pw.print(di);
     pw.print(' ');
     if (ilist.get(di).getSource() != null) {
       pw.print(ilist.get(di).getSource().toString());
     } else {
       pw.print("null-source");
     }
     pw.print(' ');
     docLen = topics[di].length;
     for (int ti = 0; ti < numTopics; ti++)
       topicDist[ti] = (((float) docTopicCounts[di][ti]) / docLen);
     if (max < 0) max = numTopics;
     for (int tp = 0; tp < max; tp++) {
       double maxvalue = 0;
       int maxindex = -1;
       for (int ti = 0; ti < numTopics; ti++)
         if (topicDist[ti] > maxvalue) {
           maxvalue = topicDist[ti];
           maxindex = ti;
         }
       if (maxindex == -1 || topicDist[maxindex] < threshold) break;
       pw.print(maxindex + " " + topicDist[maxindex] + " ");
       topicDist[maxindex] = 0;
     }
     pw.println(' ');
   }
 }
Пример #2
0
 public void printState(PrintWriter pw) {
   Alphabet a = ilist.getDataAlphabet();
   pw.println("#doc pos typeindex type topic");
   for (int di = 0; di < topics.length; di++) {
     FeatureSequence fs = (FeatureSequence) ilist.get(di).getData();
     for (int si = 0; si < topics[di].length; si++) {
       int type = fs.getIndexAtPosition(si);
       pw.print(di);
       pw.print(' ');
       pw.print(si);
       pw.print(' ');
       pw.print(type);
       pw.print(' ');
       pw.print(a.lookupObject(type));
       pw.print(' ');
       pw.print(topics[di][si]);
       pw.println();
     }
   }
 }
Пример #3
0
 public void printState(PrintWriter pw) {
   pw.println("#doc pos typeindex type bigrampossible? topic bigram");
   for (int di = 0; di < topics.length; di++) {
     FeatureSequenceWithBigrams fs = (FeatureSequenceWithBigrams) ilist.get(di).getData();
     for (int si = 0; si < topics[di].length; si++) {
       int type = fs.getIndexAtPosition(si);
       pw.print(di);
       pw.print(' ');
       pw.print(si);
       pw.print(' ');
       pw.print(type);
       pw.print(' ');
       pw.print(uniAlphabet.lookupObject(type));
       pw.print(' ');
       pw.print(fs.getBiIndexAtPosition(si) == -1 ? 0 : 1);
       pw.print(' ');
       pw.print(topics[di][si]);
       pw.print(' ');
       pw.print(grams[di][si]);
       pw.println();
     }
   }
 }
  public static void main(String[] args) throws Exception {
    InstanceList instances = InstanceList.load(new File(args[0]));
    int numTopics = Integer.parseInt(args[1]);
    ParallelTopicModel model = new ParallelTopicModel(numTopics, 5.0, 0.01);
    model.addInstances(instances);
    model.setNumIterations(1000);

    model.estimate();

    TopicModelDiagnostics diagnostics = new TopicModelDiagnostics(model, 20);

    if (args.length == 3) {
      PrintWriter out = new PrintWriter(args[2]);
      out.println(diagnostics.toXML());
      out.close();
    }
  }