예제 #1
0
 private RougeEvalBuilder genRouge() throws IOException {
   Map<Doc, String[]> evalMap = linkModels();
   RougeEvalBuilder reb = new RougeEvalBuilder(ioConf.getOutputDir(), ioConf.getModelDir());
   reb.addEvals(evalMap);
   return reb;
 }
예제 #2
0
  /**
   * @param args
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {
    // Load config files
    System.err.println("Loading config files");
    Config conf = ConfigFactory.loadConfiguration(Config.class, Config.DEFAULT);
    IOConfig ioConf = ConfigFactory.loadConfiguration(IOConfig.class, IOConfig.DEFAULT);
    LDAEstimatorConfig estConf =
        ConfigFactory.loadConfiguration(LDAEstimatorConfig.class, LDAEstimatorConfig.DEFAULT);
    LDAInferenceConfig infConf =
        ConfigFactory.loadConfiguration(LDAInferenceConfig.class, LDAInferenceConfig.DEFAULT);

    main m = new main(conf, ioConf);

    // Load files that we want to summarize
    System.err.println("Loading documents");
    m.loadFiles();

    // for (CoreMap sentence : m.documents.get(1).getAno()
    // .get(SentencesAnnotation.class)) {
    // Tree tree = sentence.get(TreeAnnotation.class);
    // tree.pennPrint();
    // }

    // for (Doc d : m.documents) {
    // for (CoreLabel token : d.getAno().get(TokensAnnotation.class)) {
    // System.out.println("Token (NE): "
    // + token.get(TextAnnotation.class) + " ("
    // + token.get(NamedEntityTagAnnotation.class) + ")");
    // }
    // }

    // Load topic models
    System.err.println("Loading topic models");
    LDAProbs inferredModel = LDAProbsLoader.loadLDAProbs(estConf, infConf);

    // Loading clusters
    System.err.println("Loading doc clusters");
    DocCluster trainCluster = SerializableWrapper.readObject(DocCluster.CLUSTER_100_PATH);

    // Assign docs to clusters
    System.err.println("Assigning docs to clusters");
    List<Integer> clusterAssign = m.assignDocClusters(inferredModel);

    // Get a list of ngram probabilities for each document
    System.err.println("Getting doc ngram probabilities");
    NGramProbs[] probs = m.genDocNGramProbs(clusterAssign, trainCluster);

    //		System.err.println("Generating list of summarizers");
    List<Summerizer[]> summarizers = m.generateSummarizerList(m.documents, probs, inferredModel);
    //		 List<Summerizer[]> summarizers =
    //		 m.generateSummarizerList(m.documents,
    //		 null, inferredModel);

    Doc[][] summaries = new Doc[summarizers.size()][m.documents.size()];
    for (int i = 0; i < m.documents.size(); i++) {
      for (int j = 0; j < summarizers.size(); j++) {
        try {
          System.out.println("Generating summary (" + j + ", " + i + ")");

          m.generateSummary(m.documents.get(i), summarizers.get(j)[i]);
          // System.out.println(m.documents.get(i).summary);

          summaries[j][i] = new Doc();
          summaries[j][i].f = m.documents.get(i).f;
          summaries[j][i].summary = m.documents.get(i).summary;
        } catch (Exception e) {
          e.printStackTrace();
          summaries[j][i] = new Doc();
          summaries[j][i].f = m.documents.get(i).f;
          summaries[j][i].summary = "NO_SUM";
          continue;
        }
      }
      m.documents.get(i).setAno(null);
    }

    System.out.println("Start calcuating ROUGE");
    int count = 0;

    // Generate the ROUGE evaluation file
    String rougeInFile = "ROUGE-IN.xml";
    RougeEvalBuilder reb = m.genRouge();
    reb.write(rougeInFile);
    FileWriter fw = new FileWriter(new File("summariesCollapsed"));

    for (Doc[] docSums : summaries) {
      fw.write(summarizers.get(count)[0].getClass() + "\n");
      for (Doc doc : docSums) {
        fw.write(doc.summary.replaceAll("\n", " ") + "\n");
      }

      // Write the summaries to disk
      m.writeSummaries(docSums, summarizers.get(count)[0].getClass());

      // Run the ROUGE script on the generated summaries and print the
      // results
      RougeScript rs = new RougeScript(conf.getRougePath(), 95, 500, 2, 1.2);
      System.out.println("Writing summaries to: results-" + summarizers.get(count)[0].getClass());
      rs.run(rougeInFile, "results-" + summarizers.get(count++)[0].getClass());
      // RougeResults results = rs.run(rougeInFile);
      // System.out.println(summarizers.get(count++)[0].getClass());
      // System.out.println(results.getNgramAvgF(1));
    }
    fw.close();

    // for (Summerizer[] s : summarizers) {
    // System.err.println("Generating summaries for " + s.getClass());
    // // Generate summaries
    // for (int i = 0; i < s.length; i++) {
    // m.generateSummary(m.documents.get(i), s[i]);
    // System.out.println(m.documents.get(i).summary);
    //
    // // Reset annotation to null so that we don't run out of memory
    // m.documents.get(i).setAno(null);
    // }
    //
    // // Write the summaries to disk
    // m.writeSummaries();
    //
    // // Generate the ROUGE evaluation file
    // String rougeInFile = "ROUGE-IN.xml";
    // RougeEvalBuilder reb = m.genRouge();
    // reb.write(rougeInFile);
    //
    // // Run the ROUGE script on the generated summaries and print the
    // // results
    // RougeScript rs = new RougeScript(conf.getRougePath(), 95, 500, 2,
    // 1.2);
    // RougeResults results = rs.run(rougeInFile);
    // System.out.println(s[0].getClass());
    // System.out.println(results.getNgramAvgF(1));
    //
    // }
  }