Exemplo n.º 1
0
  // Punish chunks whose length is other than 3.
  private static void discriminate(ChunkRanker ranker) {
    ArrayList<LexChunk> chunks = ranker.getChunks();
    for (LexChunk ch : chunks) {
      int sz = ch.size();
      double weight = sz - 3;
      if (weight < 0) weight = -weight;
      weight = 1.0 - 0.2 * weight;

      // twiddle the confidence of the chunk
      TruthValue tv = ch.getTruthValue();
      SimpleTruthValue stv = (SimpleTruthValue) tv;
      double confidence = stv.getConfidence();
      confidence *= weight;
      stv.setConfidence(confidence);
    }
  }
Exemplo n.º 2
0
  /** Main entry point */
  public static void main(String[] args) {
    String callString =
        "RelationExtractor"
            + " [-a (perform anaphora resolution)]"
            + " [--expand-preps (show expanded prepositions)]"
            + " [-h (show this help)]"
            + " [-i (show output for generation)]"
            + " [-l (show Link Grammar parse diagram)]"
            + " [--lang language (default en for English)]"
            + " [-m (show parse metadata)]"
            + " [--maxParseSeconds N]"
            + " [-n max number of parses to display]"
            + " [-o (show opencog scheme output)]"
            + " [--or (show opencog rule-based scheme output)]"
            + " [--pa (show phrase-based lexical chunks)]"
            + " [--pb (show pattern-based lexical chunks)]"
            + " [--pc (show relational lexical chunks)]"
            + " [--penn (generate Penn treebank-style POS tags)]"
            + " [--prolog (show prolog output)]"
            + " [-q (do NOT show relations)]"
            + " [-r (show raw output)]"
            + " [-s Sentence (in quotes)]"
            + " [--stanford (generate stanford-compatible output)]"
            + " [-t (show parse tree)]"
            + " [-v (verbose, full graph output)]"
            + " [--html filename (output HTML to file)]";
    HashSet<String> flags = new HashSet<String>();
    flags.add("-a");
    flags.add("--expand-preps");
    flags.add("-h");
    flags.add("-i");
    flags.add("-l");
    flags.add("-m");
    flags.add("-o");
    flags.add("--or");
    flags.add("--pa");
    flags.add("--pb");
    flags.add("--pc");
    flags.add("--penn");
    flags.add("--prolog");
    flags.add("-q");
    flags.add("-r");
    flags.add("--stanford");
    flags.add("-t");
    flags.add("-v");
    HashSet<String> opts = new HashSet<String>();
    opts.add("-n");
    opts.add("-s");
    opts.add("--html");
    opts.add("--lang");
    opts.add("--maxParseSeconds");
    Map<String, String> commandMap = CommandLineArgParser.parse(args, opts, flags);

    // Things that can be set via command line flags; cache till needed.
    String sentence = null;
    String language = "en";
    int maxParses = 1;
    int maxParseSeconds = 6;
    PrintWriter html = null;

    // Check for optional command line arguments.
    try {
      String opt = commandMap.get("-s");
      if (opt != null) sentence = opt;

      opt = commandMap.get("-n");
      if (opt != null) maxParses = Integer.parseInt(opt);

      opt = commandMap.get("--html");
      if (opt != null) html = new PrintWriter(new FileWriter(opt));

      opt = commandMap.get("--lang");
      if (opt != null) language = opt;

      opt = commandMap.get("--maxParseSeconds");
      if (opt != null) maxParseSeconds = Integer.parseInt(opt);
    } catch (Exception e) {
      System.err.println("Unrecognized parameter.");
      System.err.println(callString);
      e.printStackTrace();
      return;
    }

    if (commandMap.get("-h") != null) {
      System.err.println(callString);
      return;
    }

    // If generating OpenCog Scheme, delimit output.
    if (commandMap.get("-o") != null) System.out.print("scm\n");

    if (html != null) html.println("<html>");

    RelationExtractor re = new RelationExtractor();
    // careful: set language *before* doing other  things, to avoid call to init()
    re.setLanguage(language);
    re.setAllowSkippedWords(true);
    re.setMaxParses(maxParses);
    re.setMaxParseSeconds(maxParseSeconds);
    System.out.println("; Version: " + re.getVersion());

    // Don't run anaphora if -o is set, this will be done in a
    // distinct stage that wipes out the first run.
    if ((commandMap.get("-a") != null) && (commandMap.get("-o") == null)) {
      re.do_anaphora_resolution = true;
      re.do_tree_markup = true;
    }

    if ((commandMap.get("-t") != null)
        || (commandMap.get("--pa") != null)
        || (commandMap.get("--pb") != null)
        || (commandMap.get("--pc") != null)) {
      re.do_tree_markup = true;
    }

    if (commandMap.get("--stanford") != null) {
      re.do_stanford = true;
    }

    if (commandMap.get("--penn") != null) {
      re.do_penn_tagging = true;
    }

    if (commandMap.get("--expand-preps") != null) {
      re.do_expand_preps = true;
    }

    // If sentence is not passed at command line, read from standard input:
    BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in));
    DocSplitter ds = DocSplitterFactory.create();

    // QuotesParens is currently broken, it fails to handle possesives.
    // QuotesParensSentenceDetector ds = QuotesParensSentenceDetector.create();

    OpenCogScheme opencog = null;
    if (commandMap.get("-o") != null) {
      opencog = new OpenCogScheme();
      if (commandMap.get("-l") != null) {
        opencog.setShowLinkage(true);
      }
      opencog.setShowRelex(true);
      if (commandMap.get("-q") != null) {
        opencog.setShowRelex(false);
      }
      if (commandMap.get("-a") != null) {
        opencog.setShowAnaphora(true);
      }
    }

    boolean do_logic_output = false;
    LogicView logicView = new LogicView();
    if (commandMap.get("--or") != null) {
      do_logic_output = true;
      logicView.loadRules();
    }

    int sentence_count = 0;
    boolean more_input = true;
    while (more_input) {
      // If no sentence specified on the command line
      // (with the "-s" flag), then read it from stdin.
      while (sentence == null) {
        try {
          sentence = stdin.readLine();
          if ((sentence == null) || "END.".equals(sentence)) {
            more_input = false;
            sentence = null;
            break;
          }
        } catch (IOException e) {
          System.err.println("Error reading sentence from the standard input!");
        }

        // Buffer up input text, and wait for a whole,
        // complete sentence before continuing.
        ds.addText(sentence + " ");
        sentence = ds.getNextSentence();
      }

      while (sentence != null) {
        System.out.println("; SENTENCE: [" + sentence + "]");
        Sentence sntc = re.processSentence(sentence);

        // Crazy error condition ... the parser is broken somehow ...
        if (null == sntc) {
          sentence = ds.getNextSentence();
          break;
        }

        re.doco.addSentence(sntc);

        if (html != null)
          html.printf(
              "<div id='relex-%d'><table><tr><td>%d: %s</td></tr><tr>\n",
              sentence_count, sentence_count, escape(sentence));

        sentence_count++;
        re.stats.bin(sntc);

        int np = sntc.getParses().size();
        if (np > maxParses) np = maxParses;

        // chunk ranking stuff
        ChunkRanker ranker = new ChunkRanker();
        double parse_weight = 1.0 / ((double) np);
        double votes = 1.0e-20;
        if (commandMap.get("--pa") != null) votes += 1.0;
        if (commandMap.get("--pb") != null) votes += 2.0;
        if (commandMap.get("--pc") != null) votes += 1.0;
        votes = 1.0 / votes;
        votes *= parse_weight;

        // Print output
        int numParses = 0;
        for (ParsedSentence parse : sntc.getParses()) {
          if (commandMap.get("-o") == null) {
            System.out.println(sentence);
            System.out.println("\n====\n");
            System.out.println("Parse " + (numParses + 1) + " of " + sntc.getParses().size());
          }

          if (commandMap.get("-i") != null) {
            System.out.println("\n=====\n");
            System.out.println(NLGInputView.printRelations(parse));
            System.out.println("\n=====\n");
          }

          if (commandMap.get("-r") != null) {
            System.out.println("\n====\n");
            System.out.println("Dependency graph:\n");
            System.out.println(RawView.printZHeads(parse.getLeft()));
            System.out.println("\n======\n");
          }

          if (commandMap.get("-t") != null) {
            System.out.println("\n" + parse.getPhraseString());
            if (html != null)
              html.printf("<td colspan='2'>%s</td></tr><tr>", escape(parse.getPhraseString()));
          }

          // Don't print the link string if xml output is enabled.
          // XML parsers choke on it.
          if ((commandMap.get("-l") != null) && (commandMap.get("-o") == null))
            System.out.println("\n" + parse.getLinkString());

          if (commandMap.get("-m") != null) {
            System.out.println(parse.getMetaData().toString() + "\n");
          }

          if (commandMap.get("-o") == null) {
            // Print simple parse ranking
            Double confidence = parse.getTruthValue().getConfidence();
            String prt_cnfd = confidence.toString();
            prt_cnfd = prt_cnfd.substring(0, Math.min(6, prt_cnfd.length()));
            System.out.println("Parse confidence: " + prt_cnfd);
            System.out.println(
                "cost vector = (UNUSED="
                    + parse.getNumSkippedWords()
                    + " DIS="
                    + parse.getDisjunctCost()
                    + " LEN="
                    + parse.getLinkCost()
                    + ")");
          }

          // Verbose graph.
          if (commandMap.get("-v") != null)
            // System.out.println("\n" + parse.fullParseString());
            System.out.println("\n" + parse.getLeft().toString(LinkView.getFilter()));

          if ((commandMap.get("-q") == null)
              && (commandMap.get("-o") == null)
              && re.do_apply_algs) {
            System.out.println("\n======\n");
            System.out.println("Dependency relations:\n");
            System.out.println(SimpleView.printRelations(parse));
            System.out.println("\n======\n");

            if (html != null)
              html.printf(
                  "<td valign='top'><pre>%s</pre></td>\n",
                  escape(SimpleView.printRelations(parse)));
          }

          if (do_logic_output) {
            System.out.println("\n======\n");
            System.out.println("Relex2Logic output:");
            System.out.println(logicView.printRelationsNew(parse));
            System.out.println("\n======\n");
          }

          if (commandMap.get("--pa") != null) {
            System.out.println("Phrase tree-based lexical chunks:");
            LexicalChunker chunker = new PhraseChunker();
            chunker.findChunks(parse);
            prt_chunks(chunker.getChunks());
            ranker.add(chunker.getChunks(), parse.getTruthValue(), votes);
          }
          if (commandMap.get("--pb") != null) {
            System.out.println("Pattern-matching lexical chunks:");
            LexicalChunker chunker = new PatternChunker();
            chunker.findChunks(parse);
            prt_chunks(chunker.getChunks());
            ranker.add(chunker.getChunks(), parse.getTruthValue(), 2.0 * votes);
          }
          if (commandMap.get("--pc") != null) {
            System.out.println("Relation-based lexical chunks:");
            LexicalChunker chunker = new RelationChunker();
            chunker.findChunks(parse);
            prt_chunks(chunker.getChunks());
            ranker.add(chunker.getChunks(), parse.getTruthValue(), votes);
          }

          if (commandMap.get("--prolog") != null) {
            PrologList pl = new PrologList();
            System.out.println(
                pl.toPrologList(parse.getLeft(), PrologList.getDefaultFilter(), true));
            System.out.println("\n======\n");
          }

          if (commandMap.get("--stanford") != null) {
            System.out.println("Stanford-style dependency relations:\n");
            System.out.println(StanfordView.printRelations(parse, re.do_penn_tagging, "    "));
            System.out.println("\n======\n");
          }

          if (commandMap.get("-o") != null) {
            opencog.setParse(parse);
            System.out.println(opencog.toString());
          }

          if (html != null) html.println("</tr></table></div>");

          if (++numParses >= maxParses) break;
        }

        if (0 < ranker.getChunks().size()) {
          discriminate(ranker);
          System.out.println("\nLexical Chunks:\n" + ranker.toString());
        }

        if (re.do_anaphora_resolution && (commandMap.get("-o") == null)) {
          System.out.println("\nAntecedent candidates:\n" + re.antecedents.toString());
        }

        // Print out the stats every now and then.
        if (sentence_count % 5 == 0) {
          System.err.println("\n" + re.stats.toString());
        }

        if (commandMap.get("-s") != null) break;
        sentence = ds.getNextSentence();
      }
      if (commandMap.get("-s") != null) break;
    }

    if (html != null) {
      html.println("</html>");
      html.close();
    }

    // Dump the list of document sentences
    if (commandMap.get("-o") != null) {
      System.out.println(opencog.printDocument(re.doco));
    }
    System.out.println("; Bye.");
    if (commandMap.get("-o") != null) {
      System.out.println(".\nexit");
    }
    System.exit(0);
  }