public static void outputTopWordsWithProbs( double[][] topicWordDistr, ArrayList<String> vocab, int numTopWord, String filepath) throws Exception { BufferedWriter writer = IOUtils.getBufferedWriter(filepath); for (int t = 0; t < topicWordDistr.length; t++) { // sort words double[] bs = topicWordDistr[t]; ArrayList<RankingItem<Integer>> rankedWords = new ArrayList<RankingItem<Integer>>(); for (int i = 0; i < bs.length; i++) { rankedWords.add(new RankingItem<Integer>(i, bs[i])); } Collections.sort(rankedWords); // output top words writer.write("Topic " + (t + 1)); double cumm_prob = 0; for (int i = 0; i < Math.min(numTopWord, vocab.size()); i++) { cumm_prob += rankedWords.get(i).getPrimaryValue(); writer.write( "\t" + vocab.get(rankedWords.get(i).getObject()) + ", " + rankedWords.get(i).getPrimaryValue() + ", " + cumm_prob); } writer.write("\n"); } writer.close(); }
public static void outputLibSVM(File outputFile, SparseVector[] features, int[][] labels) { System.out.println("Outputing LIBSVM-formatted data to " + outputFile); try { BufferedWriter writer = IOUtils.getBufferedWriter(outputFile); for (int ii = 0; ii < features.length; ii++) { if (labels[ii].length == 0) { continue; } // labels for (int jj = 0; jj < labels[ii].length - 1; jj++) { writer.write(labels[ii][jj] + ","); } writer.write(Integer.toString(labels[ii][labels[ii].length - 1]) + " "); // features for (int idx : features[ii].getSortedIndices()) { double featureVal = features[ii].get(idx); if (Math.abs(featureVal) < 10E-6) { continue; } writer.write(" " + idx + ":" + features[ii].get(idx)); } writer.write("\n"); } writer.close(); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException( "Exception while outputing " + "LIBSVM-formatted data to " + outputFile); } }
public static void outputDistribution(double[] distr, String filepath) throws Exception { BufferedWriter writer = IOUtils.getBufferedWriter(filepath); for (double d : distr) { writer.write(d + " "); } writer.close(); }
/** * Output top words for each topic with indices * * @param topicIndices List of topic indices * @param topicWordDistr 2D array containing topical word distributions * @param vocab List of tokens in the vocabulary * @param numTopWord Number of top words to output * @param filepath Path to the output file */ public static void outputTopWords( ArrayList<Integer> topicIndices, double[][] topicWordDistr, ArrayList<String> vocab, int numTopWord, String filepath) throws Exception { BufferedWriter writer = IOUtils.getBufferedWriter(filepath); for (int t = 0; t < topicWordDistr.length; t++) { // sort words double[] bs = topicWordDistr[t]; ArrayList<RankingItem<Integer>> rankedWords = new ArrayList<RankingItem<Integer>>(); for (int i = 0; i < bs.length; i++) { rankedWords.add(new RankingItem<Integer>(i, bs[i])); } Collections.sort(rankedWords); // output top words writer.write("Topic " + topicIndices.get(t)); for (int i = 0; i < Math.min(numTopWord, vocab.size()); i++) { writer.write("\t" + vocab.get(rankedWords.get(i).getObject())); } writer.write("\n"); } writer.close(); }
public static void outputLogLikelihoods(ArrayList<Double> logLhoods, String filepath) throws Exception { BufferedWriter writer = IOUtils.getBufferedWriter(filepath); for (int i = 0; i < logLhoods.size(); i++) { writer.write(i + "\t" + logLhoods.get(i) + "\n"); } writer.close(); }
public static void outputPerplexity(String outputFile, double perplexity) { System.out.println("Outputing perplexity to " + outputFile); try { BufferedWriter writer = IOUtils.getBufferedWriter(outputFile); writer.write(perplexity + "\n"); writer.close(); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Exception while outputing " + outputFile); } }
public static void outputLatentVariables(double[][] vars, String filepath) throws Exception { BufferedWriter writer = IOUtils.getBufferedWriter(filepath); StringBuilder line; for (double[] var : vars) { line = new StringBuilder(); for (double v : var) { line.append(Double.toString(v)).append(" "); } writer.write(line.toString().trim() + "\n"); } writer.close(); }
/** Output latent variable assignments */ public static void outputLatentVariableAssignment(int[][] var, String filepath) throws Exception { StringBuilder outputLine; BufferedWriter writer = IOUtils.getBufferedWriter(filepath); for (int[] var_line : var) { if (var_line.length == 0) { writer.write("\n"); } else { outputLine = new StringBuilder(); outputLine.append(Integer.toString(var_line.length)).append("\t"); for (int v : var_line) { outputLine.append(Integer.toString(v)).append(" "); } writer.write(outputLine.toString().trim() + "\n"); } } writer.close(); }
public static void outputTopicCoherences(File outputFile, ArrayList<Double> topicCoherences) { try { BufferedWriter writer = IOUtils.getBufferedWriter(outputFile); writer.write("Average-Coherence\t" + StatUtils.mean(topicCoherences) + "\n"); writer.write("Min-Coherence\t" + StatUtils.min(topicCoherences) + "\n"); writer.write("Max-Coherence\t" + StatUtils.max(topicCoherences) + "\n"); writer.write("Median-Coherence\t" + StatUtils.median(topicCoherences) + "\n"); for (Double topicCoherence : topicCoherences) { writer.write(topicCoherence + "\n"); } writer.close(); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException( "Exception while outputing perplexity" + " results to " + outputFile); } }