コード例 #1
0
  public static void main(String[] args) {
    if (args.length < minArgs) {
      System.out.println(usage());
      System.exit(-1);
    }

    Properties options = StringUtils.argsToProperties(args, argDefs());
    Language language = PropertiesUtils.get(options, "l", Language.English, Language.class);
    TreebankLangParserParams tlpp = language.params;
    DiskTreebank tb = null;
    String encoding = options.getProperty("l", "UTF-8");
    boolean removeBracket = PropertiesUtils.getBool(options, "b", false);

    tlpp.setInputEncoding(encoding);
    tlpp.setOutputEncoding(encoding);
    tb = tlpp.diskTreebank();

    String[] files = options.getProperty("", "").split("\\s+");
    if (files.length != 0) {
      for (String filename : files) {
        tb.loadPath(filename);
      }
    } else {
      log.info(usage());
      System.exit(-1);
    }

    PrintWriter pwo = tlpp.pw();
    String startSymbol = tlpp.treebankLanguagePack().startSymbol();
    TreeFactory tf = new LabeledScoredTreeFactory();
    int nTrees = 0;
    for (Tree t : tb) {
      if (removeBracket) {
        if (t.value().equals(startSymbol)) {
          t = t.firstChild();
        }

      } else if (!t.value().equals(startSymbol)) { // Add a bracket if it isn't already there
        t = tf.newTreeNode(startSymbol, Collections.singletonList(t));
      }
      pwo.println(t.toString());
      nTrees++;
    }
    pwo.close();
    System.err.printf("Processed %d trees.%n", nTrees);
  }
コード例 #2
0
  /**
   * Run the Evalb scoring metric on guess/gold input. The default language is English.
   *
   * @param args
   */
  public static void main(String[] args) {
    TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
    int maxGoldYield = Integer.MAX_VALUE;
    boolean VERBOSE = false;
    String encoding = "UTF-8";

    String guessFile = null;
    String goldFile = null;

    Map<String, String[]> argsMap = StringUtils.argsToMap(args, optionArgDefs);

    for (Map.Entry<String, String[]> opt : argsMap.entrySet()) {
      if (opt.getKey() == null) continue;
      if (opt.getKey().equals("-l")) {
        Language lang = Language.valueOf(opt.getValue()[0].trim());
        tlpp = lang.params;

      } else if (opt.getKey().equals("-y")) {
        maxGoldYield = Integer.parseInt(opt.getValue()[0].trim());

      } else if (opt.getKey().equals("-v")) {
        VERBOSE = true;

      } else if (opt.getKey().equals("-e")) {
        encoding = opt.getValue()[0];

      } else {
        System.err.println(usage.toString());
        System.exit(-1);
      }

      // Non-option arguments located at key null
      String[] rest = argsMap.get(null);
      if (rest == null || rest.length < minArgs) {
        System.err.println(usage.toString());
        System.exit(-1);
      }
      goldFile = rest[0];
      guessFile = rest[1];
    }

    tlpp.setInputEncoding(encoding);
    final PrintWriter pwOut = tlpp.pw();

    final Treebank guessTreebank = tlpp.diskTreebank();
    guessTreebank.loadPath(guessFile);
    pwOut.println("GUESS TREEBANK:");
    pwOut.println(guessTreebank.textualSummary());

    final Treebank goldTreebank = tlpp.diskTreebank();
    goldTreebank.loadPath(goldFile);
    pwOut.println("GOLD TREEBANK:");
    pwOut.println(goldTreebank.textualSummary());

    final UnlabeledAttachmentEval metric =
        new UnlabeledAttachmentEval("UAS LP/LR", true, tlpp.headFinder());

    final TreeTransformer tc = tlpp.collinizer();

    // The evalb ref implementation assigns status for each tree pair as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    // In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
    final Iterator<Tree> goldItr = goldTreebank.iterator();
    final Iterator<Tree> guessItr = guessTreebank.iterator();
    int goldLineId = 0;
    int guessLineId = 0;
    int skippedGuessTrees = 0;
    while (guessItr.hasNext() && goldItr.hasNext()) {
      Tree guessTree = guessItr.next();
      List<? extends Label> guessYield = guessTree.yield();
      guessLineId++;

      Tree goldTree = goldItr.next();
      List<? extends Label> goldYield = goldTree.yield();
      goldLineId++;

      // Check that we should evaluate this tree
      if (goldYield.size() > maxGoldYield) {
        skippedGuessTrees++;
        continue;
      }

      // Only trees with equal yields can be evaluated
      if (goldYield.size() != guessYield.size()) {
        pwOut.printf(
            "Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n",
            goldYield.size(), guessYield.size(), goldLineId, guessLineId);
        skippedGuessTrees++;
        continue;
      }

      final Tree evalGuess = tc.transformTree(guessTree);
      evalGuess.indexLeaves(true);
      final Tree evalGold = tc.transformTree(goldTree);
      evalGold.indexLeaves(true);

      metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
    }

    if (guessItr.hasNext() || goldItr.hasNext()) {
      System.err.printf(
          "Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.",
          guessLineId, goldLineId);
    }

    pwOut.println(
        "================================================================================");
    if (skippedGuessTrees != 0)
      pwOut.printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
    metric.display(true, pwOut);

    pwOut.println();
    pwOut.close();
  }
コード例 #3
0
  /** Execute with no arguments for usage. */
  public static void main(String[] args) {

    if (!validateCommandLine(args)) {
      System.err.println(USAGE);
      System.exit(-1);
    }

    final TreebankLangParserParams tlpp = LANGUAGE.params;
    final PrintWriter pwOut = tlpp.pw();

    final Treebank guessTreebank = tlpp.diskTreebank();
    guessTreebank.loadPath(guessFile);
    pwOut.println("GUESS TREEBANK:");
    pwOut.println(guessTreebank.textualSummary());

    final Treebank goldTreebank = tlpp.diskTreebank();
    goldTreebank.loadPath(goldFile);
    pwOut.println("GOLD TREEBANK:");
    pwOut.println(goldTreebank.textualSummary());

    final LeafAncestorEval metric = new LeafAncestorEval("LeafAncestor");

    final TreeTransformer tc = tlpp.collinizer();

    // The evalb ref implementation assigns status for each tree pair as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    // In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
    final Iterator<Tree> goldItr = goldTreebank.iterator();
    final Iterator<Tree> guessItr = guessTreebank.iterator();
    int goldLineId = 0;
    int guessLineId = 0;
    int skippedGuessTrees = 0;
    while (guessItr.hasNext() && goldItr.hasNext()) {
      Tree guessTree = guessItr.next();
      List<? extends Label> guessYield = guessTree.yield();
      guessLineId++;

      Tree goldTree = goldItr.next();
      List<? extends Label> goldYield = goldTree.yield();
      goldLineId++;

      // Check that we should evaluate this tree
      if (goldYield.size() > MAX_GOLD_YIELD) {
        skippedGuessTrees++;
        continue;
      }

      // Only trees with equal yields can be evaluated
      if (goldYield.size() != guessYield.size()) {
        pwOut.printf(
            "Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n",
            goldYield.size(), guessYield.size(), goldLineId, guessLineId);
        skippedGuessTrees++;
        continue;
      }

      final Tree evalGuess = tc.transformTree(guessTree);
      final Tree evalGold = tc.transformTree(goldTree);

      metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
    }

    if (guessItr.hasNext() || goldItr.hasNext()) {
      System.err.printf(
          "Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.",
          guessLineId, goldLineId);
    }

    pwOut.println(
        "================================================================================");
    if (skippedGuessTrees != 0)
      pwOut.printf("%s %d guess trees%n", "Unable to evaluate", skippedGuessTrees);
    metric.display(true, pwOut);
    pwOut.close();
  }