private List<Summerizer[]> generateSummarizerList( List<Doc> docs, NGramProbs[] probs, LDAProbs inferredProbs) { List<Summerizer[]> summarizers = new ArrayList<Summerizer[]>(); Summerizer[] s; // s = new Summerizer[docs.size()]; // for (int i = 0; i < s.length; i++) { // s[i] = new FirstSentSum(docs.get(i), DEFAULT_MAX_SUMMARY_LENGTH); // } // summarizers.add(s); s = new Summerizer[docs.size()]; for (int i = 0; i < s.length; i++) { System.out.println("Generating FirstBaseline #" + (i + 1)); s[i] = new FirstBaseline(docs.get(i), DEFAULT_MAX_SUMMARY_LENGTH); } summarizers.add(s); s = new Summerizer[docs.size()]; for (int i = 0; i < s.length; i++) { System.out.println("Generating SecondBaseline #" + (i + 1)); s[i] = new SecondBaseline(docs.get(i), DEFAULT_MAX_SUMMARY_LENGTH); } summarizers.add(s); // need to change constructors to include corpus TreeMap // s = new Summerizer[docs.size()]; // for (int i = 0; i < s.length; i++) { // System.out.println("Generating ArticleTopicNGramSum #" + (i+1)); // s[i] = new ArticleTopicNGramSum(docs.get(i), // DEFAULT_MAX_SUMMARY_LENGTH); // } // summarizers.add(s); // s = new Summerizer[docs.size()]; // for (int i = 0; i < s.length; i++) { // System.out.println("Generating NeFreqBasedSum #" + (i+1)); // s[i] = new NeFreqBasedSum(docs.get(i), DEFAULT_MAX_SUMMARY_LENGTH); // } // summarizers.add(s); // s = new Summerizer[docs.size()]; // for (int i = 0; i < s.length; i++) { // System.out.println("Generating MostProbSentBasedOnTopicDocProb #" + (i+1)); // s[i] = new MostProbSentBasedOnTopicDocProb(docs.get(i), // DEFAULT_MAX_SUMMARY_LENGTH); // } // summarizers.add(s); CorpusCounts counts; try { counts = SerializableWrapper.readObject(CorpusCounts.SAVE_PATH); } catch (IOException e) { throw new RuntimeException(e); } s = new Summerizer[docs.size()]; for (int i = 0; i < s.length; i++) { System.out.println("Generating FeatureBased #" + (i + 1)); Feature tf_idf = new Tf_IdfFeature(counts, docs.get(i)); Feature lda = new LDAFeature(inferredProbs, docs.get(i)); s[i] = new FeatureBasedSummary(docs.get(i), DEFAULT_MAX_SUMMARY_LENGTH, probs[i], tf_idf, lda); } summarizers.add(s); s = new Summerizer[docs.size()]; for (int i = 0; i < s.length; i++) { System.out.println("Generating FeatureBased_Sent #" + (i + 1)); Feature tf_idf = new Tf_IdfFeature(counts, docs.get(i)); Feature lda = new LDAFeature(inferredProbs, docs.get(i)); s[i] = new FeatureBasedSummary_Sent( docs.get(i), DEFAULT_MAX_SUMMARY_LENGTH, probs[i], tf_idf, lda); } summarizers.add(s); s = new Summerizer[docs.size()]; for (int i = 0; i < s.length; i++) { System.out.println("Generating FeatureBased_BagOfWords #" + (i + 1)); Feature tf_idf = new Tf_IdfFeature(counts, docs.get(i)); Feature lda = new LDAFeature(inferredProbs, docs.get(i)); s[i] = new FeatureBasedSummary_BagOfWords( docs.get(i), DEFAULT_MAX_SUMMARY_LENGTH, probs[i], tf_idf, lda); } summarizers.add(s); // s = new Summerizer[docs.size()]; // for (int i = 0; i < s.length; i++) { // System.out.println("Generating MostProbSentSimpleGreedy #" + (i+1)); // s[i] = new MostProbSentSimpleGreedy(docs.get(i), // DEFAULT_MAX_SUMMARY_LENGTH, probs[i]); // } // summarizers.add(s); return summarizers; }
/** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { // Load config files System.err.println("Loading config files"); Config conf = ConfigFactory.loadConfiguration(Config.class, Config.DEFAULT); IOConfig ioConf = ConfigFactory.loadConfiguration(IOConfig.class, IOConfig.DEFAULT); LDAEstimatorConfig estConf = ConfigFactory.loadConfiguration(LDAEstimatorConfig.class, LDAEstimatorConfig.DEFAULT); LDAInferenceConfig infConf = ConfigFactory.loadConfiguration(LDAInferenceConfig.class, LDAInferenceConfig.DEFAULT); main m = new main(conf, ioConf); // Load files that we want to summarize System.err.println("Loading documents"); m.loadFiles(); // for (CoreMap sentence : m.documents.get(1).getAno() // .get(SentencesAnnotation.class)) { // Tree tree = sentence.get(TreeAnnotation.class); // tree.pennPrint(); // } // for (Doc d : m.documents) { // for (CoreLabel token : d.getAno().get(TokensAnnotation.class)) { // System.out.println("Token (NE): " // + token.get(TextAnnotation.class) + " (" // + token.get(NamedEntityTagAnnotation.class) + ")"); // } // } // Load topic models System.err.println("Loading topic models"); LDAProbs inferredModel = LDAProbsLoader.loadLDAProbs(estConf, infConf); // Loading clusters System.err.println("Loading doc clusters"); DocCluster trainCluster = SerializableWrapper.readObject(DocCluster.CLUSTER_100_PATH); // Assign docs to clusters System.err.println("Assigning docs to clusters"); List<Integer> clusterAssign = m.assignDocClusters(inferredModel); // Get a list of ngram probabilities for each document System.err.println("Getting doc ngram probabilities"); NGramProbs[] probs = m.genDocNGramProbs(clusterAssign, trainCluster); // System.err.println("Generating list of summarizers"); List<Summerizer[]> summarizers = m.generateSummarizerList(m.documents, probs, inferredModel); // List<Summerizer[]> summarizers = // m.generateSummarizerList(m.documents, // null, inferredModel); Doc[][] summaries = new Doc[summarizers.size()][m.documents.size()]; for (int i = 0; i < m.documents.size(); i++) { for (int j = 0; j < summarizers.size(); j++) { try { System.out.println("Generating summary (" + j + ", " + i + ")"); m.generateSummary(m.documents.get(i), summarizers.get(j)[i]); // System.out.println(m.documents.get(i).summary); summaries[j][i] = new Doc(); summaries[j][i].f = m.documents.get(i).f; summaries[j][i].summary = m.documents.get(i).summary; } catch (Exception e) { e.printStackTrace(); summaries[j][i] = new Doc(); summaries[j][i].f = m.documents.get(i).f; summaries[j][i].summary = "NO_SUM"; continue; } } m.documents.get(i).setAno(null); } System.out.println("Start calcuating ROUGE"); int count = 0; // Generate the ROUGE evaluation file String rougeInFile = "ROUGE-IN.xml"; RougeEvalBuilder reb = m.genRouge(); reb.write(rougeInFile); FileWriter fw = new FileWriter(new File("summariesCollapsed")); for (Doc[] docSums : summaries) { fw.write(summarizers.get(count)[0].getClass() + "\n"); for (Doc doc : docSums) { fw.write(doc.summary.replaceAll("\n", " ") + "\n"); } // Write the summaries to disk m.writeSummaries(docSums, summarizers.get(count)[0].getClass()); // Run the ROUGE script on the generated summaries and print the // results RougeScript rs = new RougeScript(conf.getRougePath(), 95, 500, 2, 1.2); System.out.println("Writing summaries to: results-" + summarizers.get(count)[0].getClass()); rs.run(rougeInFile, "results-" + summarizers.get(count++)[0].getClass()); // RougeResults results = rs.run(rougeInFile); // System.out.println(summarizers.get(count++)[0].getClass()); // System.out.println(results.getNgramAvgF(1)); } fw.close(); // for (Summerizer[] s : summarizers) { // System.err.println("Generating summaries for " + s.getClass()); // // Generate summaries // for (int i = 0; i < s.length; i++) { // m.generateSummary(m.documents.get(i), s[i]); // System.out.println(m.documents.get(i).summary); // // // Reset annotation to null so that we don't run out of memory // m.documents.get(i).setAno(null); // } // // // Write the summaries to disk // m.writeSummaries(); // // // Generate the ROUGE evaluation file // String rougeInFile = "ROUGE-IN.xml"; // RougeEvalBuilder reb = m.genRouge(); // reb.write(rougeInFile); // // // Run the ROUGE script on the generated summaries and print the // // results // RougeScript rs = new RougeScript(conf.getRougePath(), 95, 500, 2, // 1.2); // RougeResults results = rs.run(rougeInFile); // System.out.println(s[0].getClass()); // System.out.println(results.getNgramAvgF(1)); // // } }