public String toString() {

    StringBuilder out = new StringBuilder();
    Formatter formatter = new Formatter(out, Locale.US);

    for (int topic = 0; topic < numTopics; topic++) {

      formatter.format("Topic %d", topic);

      for (TopicScores scores : diagnostics) {
        formatter.format("\t%s=%.4f", scores.name, scores.scores[topic]);
      }
      formatter.format("\n");

      for (int position = 0; position < topicTopWords[topic].length; position++) {
        if (topicTopWords[topic][position] == null) {
          break;
        }

        formatter.format("  %s", topicTopWords[topic][position]);
        for (TopicScores scores : diagnostics) {
          if (scores.wordScoresDefined) {
            formatter.format("\t%s=%.4f", scores.name, scores.topicWordScores[topic][position]);
          }
        }
        out.append("\n");
      }
    }

    return out.toString();
  }
Пример #2
0
  public void doInference() {

    try {

      ParallelTopicModel model = ParallelTopicModel.read(new File(inferencerFile));
      TopicInferencer inferencer = model.getInferencer();

      // TopicInferencer inferencer =
      //    TopicInferencer.read(new File(inferencerFile));

      // InstanceList testing = readFile();
      readFile();
      InstanceList testing = generateInstanceList(); // readFile();

      for (int i = 0; i < testing.size(); i++) {

        StringBuilder probabilities = new StringBuilder();
        double[] testProbabilities = inferencer.getSampledDistribution(testing.get(i), 10, 1, 5);

        ArrayList probabilityList = new ArrayList();

        for (int j = 0; j < testProbabilities.length; j++) {
          probabilityList.add(new Pair<Integer, Double>(j, testProbabilities[j]));
        }

        Collections.sort(probabilityList, new CustomComparator());

        for (int j = 0; j < testProbabilities.length && j < topN; j++) {
          if (j > 0) probabilities.append(" ");
          probabilities.append(
              ((Pair<Integer, Double>) probabilityList.get(j)).getFirst().toString()
                  + ","
                  + ((Pair<Integer, Double>) probabilityList.get(j)).getSecond().toString());
        }

        System.out.println(docIds.get(i) + "," + probabilities.toString());
      }

    } catch (Exception e) {
      e.printStackTrace();
      System.err.println(e.getMessage());
    }
  }
  public String toXML() {

    int[] tokensPerTopic = model.tokensPerTopic;

    StringBuilder out = new StringBuilder();
    Formatter formatter = new Formatter(out, Locale.US);

    out.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
    out.append("<model>\n");

    for (int topic = 0; topic < numTopics; topic++) {

      int[][] matrix = topicCodocumentMatrices[topic];

      formatter.format("<topic id='%d'", topic);

      for (TopicScores scores : diagnostics) {
        formatter.format(" %s='%.4f'", scores.name, scores.scores[topic]);
      }
      out.append(">\n");

      TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);

      // How many words should we report? Some topics may have fewer than
      //  the default number of words with non-zero weight.
      int limit = numTopWords;
      if (sortedWords.size() < numTopWords) {
        limit = sortedWords.size();
      }

      double cumulativeProbability = 0.0;

      Iterator<IDSorter> iterator = sortedWords.iterator();
      for (int position = 0; position < limit; position++) {
        IDSorter info = iterator.next();
        double probability = info.getWeight() / tokensPerTopic[topic];
        cumulativeProbability += probability;

        formatter.format(
            "<word rank='%d' count='%.0f' prob='%.5f' cumulative='%.5f' docs='%d'",
            position + 1,
            info.getWeight(),
            probability,
            cumulativeProbability,
            matrix[position][position]);

        for (TopicScores scores : diagnostics) {
          if (scores.wordScoresDefined) {
            formatter.format(" %s='%.4f'", scores.name, scores.topicWordScores[topic][position]);
          }
        }
        formatter.format(
            ">%s</word>\n",
            topicTopWords[topic][position].replaceAll("&", "&amp;").replaceAll("<", "&gt;"));
      }

      out.append("</topic>\n");
    }
    out.append("</model>\n");

    return out.toString();
  }