예제 #1
1
파일: IOUtils.java 프로젝트: cmoose/segan
  public static void outputTopWordsWithProbs(
      double[][] topicWordDistr, ArrayList<String> vocab, int numTopWord, String filepath)
      throws Exception {
    BufferedWriter writer = IOUtils.getBufferedWriter(filepath);
    for (int t = 0; t < topicWordDistr.length; t++) {
      // sort words
      double[] bs = topicWordDistr[t];
      ArrayList<RankingItem<Integer>> rankedWords = new ArrayList<RankingItem<Integer>>();
      for (int i = 0; i < bs.length; i++) {
        rankedWords.add(new RankingItem<Integer>(i, bs[i]));
      }
      Collections.sort(rankedWords);

      // output top words
      writer.write("Topic " + (t + 1));
      double cumm_prob = 0;
      for (int i = 0; i < Math.min(numTopWord, vocab.size()); i++) {
        cumm_prob += rankedWords.get(i).getPrimaryValue();
        writer.write(
            "\t"
                + vocab.get(rankedWords.get(i).getObject())
                + ", "
                + rankedWords.get(i).getPrimaryValue()
                + ", "
                + cumm_prob);
      }
      writer.write("\n");
    }
    writer.close();
  }
예제 #2
0
파일: IOUtils.java 프로젝트: cmoose/segan
  public static void outputLibSVM(File outputFile, SparseVector[] features, int[][] labels) {
    System.out.println("Outputing LIBSVM-formatted data to " + outputFile);
    try {
      BufferedWriter writer = IOUtils.getBufferedWriter(outputFile);
      for (int ii = 0; ii < features.length; ii++) {
        if (labels[ii].length == 0) {
          continue;
        }
        // labels
        for (int jj = 0; jj < labels[ii].length - 1; jj++) {
          writer.write(labels[ii][jj] + ",");
        }
        writer.write(Integer.toString(labels[ii][labels[ii].length - 1]) + " ");

        // features
        for (int idx : features[ii].getSortedIndices()) {
          double featureVal = features[ii].get(idx);
          if (Math.abs(featureVal) < 10E-6) {
            continue;
          }
          writer.write(" " + idx + ":" + features[ii].get(idx));
        }
        writer.write("\n");
      }
      writer.close();
    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException(
          "Exception while outputing " + "LIBSVM-formatted data to " + outputFile);
    }
  }
예제 #3
0
파일: IOUtils.java 프로젝트: cmoose/segan
 public static void outputDistribution(double[] distr, String filepath) throws Exception {
   BufferedWriter writer = IOUtils.getBufferedWriter(filepath);
   for (double d : distr) {
     writer.write(d + " ");
   }
   writer.close();
 }
예제 #4
0
파일: IOUtils.java 프로젝트: cmoose/segan
  /**
   * Output top words for each topic with indices
   *
   * @param topicIndices List of topic indices
   * @param topicWordDistr 2D array containing topical word distributions
   * @param vocab List of tokens in the vocabulary
   * @param numTopWord Number of top words to output
   * @param filepath Path to the output file
   */
  public static void outputTopWords(
      ArrayList<Integer> topicIndices,
      double[][] topicWordDistr,
      ArrayList<String> vocab,
      int numTopWord,
      String filepath)
      throws Exception {

    BufferedWriter writer = IOUtils.getBufferedWriter(filepath);
    for (int t = 0; t < topicWordDistr.length; t++) {
      // sort words
      double[] bs = topicWordDistr[t];
      ArrayList<RankingItem<Integer>> rankedWords = new ArrayList<RankingItem<Integer>>();
      for (int i = 0; i < bs.length; i++) {
        rankedWords.add(new RankingItem<Integer>(i, bs[i]));
      }
      Collections.sort(rankedWords);

      // output top words
      writer.write("Topic " + topicIndices.get(t));
      for (int i = 0; i < Math.min(numTopWord, vocab.size()); i++) {
        writer.write("\t" + vocab.get(rankedWords.get(i).getObject()));
      }
      writer.write("\n");
    }
    writer.close();
  }
예제 #5
0
파일: IOUtils.java 프로젝트: cmoose/segan
 public static void outputLogLikelihoods(ArrayList<Double> logLhoods, String filepath)
     throws Exception {
   BufferedWriter writer = IOUtils.getBufferedWriter(filepath);
   for (int i = 0; i < logLhoods.size(); i++) {
     writer.write(i + "\t" + logLhoods.get(i) + "\n");
   }
   writer.close();
 }
예제 #6
0
파일: IOUtils.java 프로젝트: cmoose/segan
 public static void outputPerplexity(String outputFile, double perplexity) {
   System.out.println("Outputing perplexity to " + outputFile);
   try {
     BufferedWriter writer = IOUtils.getBufferedWriter(outputFile);
     writer.write(perplexity + "\n");
     writer.close();
   } catch (Exception e) {
     e.printStackTrace();
     throw new RuntimeException("Exception while outputing " + outputFile);
   }
 }
예제 #7
0
파일: IOUtils.java 프로젝트: cmoose/segan
 public static void outputLatentVariables(double[][] vars, String filepath) throws Exception {
   BufferedWriter writer = IOUtils.getBufferedWriter(filepath);
   StringBuilder line;
   for (double[] var : vars) {
     line = new StringBuilder();
     for (double v : var) {
       line.append(Double.toString(v)).append(" ");
     }
     writer.write(line.toString().trim() + "\n");
   }
   writer.close();
 }
예제 #8
0
파일: IOUtils.java 프로젝트: cmoose/segan
 /** Output latent variable assignments */
 public static void outputLatentVariableAssignment(int[][] var, String filepath) throws Exception {
   StringBuilder outputLine;
   BufferedWriter writer = IOUtils.getBufferedWriter(filepath);
   for (int[] var_line : var) {
     if (var_line.length == 0) {
       writer.write("\n");
     } else {
       outputLine = new StringBuilder();
       outputLine.append(Integer.toString(var_line.length)).append("\t");
       for (int v : var_line) {
         outputLine.append(Integer.toString(v)).append(" ");
       }
       writer.write(outputLine.toString().trim() + "\n");
     }
   }
   writer.close();
 }
예제 #9
0
파일: IOUtils.java 프로젝트: cmoose/segan
 public static void outputTopicCoherences(File outputFile, ArrayList<Double> topicCoherences) {
   try {
     BufferedWriter writer = IOUtils.getBufferedWriter(outputFile);
     writer.write("Average-Coherence\t" + StatUtils.mean(topicCoherences) + "\n");
     writer.write("Min-Coherence\t" + StatUtils.min(topicCoherences) + "\n");
     writer.write("Max-Coherence\t" + StatUtils.max(topicCoherences) + "\n");
     writer.write("Median-Coherence\t" + StatUtils.median(topicCoherences) + "\n");
     for (Double topicCoherence : topicCoherences) {
       writer.write(topicCoherence + "\n");
     }
     writer.close();
   } catch (Exception e) {
     e.printStackTrace();
     throw new RuntimeException(
         "Exception while outputing perplexity" + " results to " + outputFile);
   }
 }