Java TreebankLangParserParams Examples

Programming Language: Java

Namespace/Package Name: edu.stanford.nlp.parser.lexparser

Examples at hotexamples.com: 4

Java TreebankLangParserParams - 4 examples found. These are the top rated real world Java examples of edu.stanford.nlp.parser.lexparser.TreebankLangParserParams extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

diskTreebank(4)

pw(3)

collinizer(2)

setInputEncoding(2)

headFinder(1)

setOptionFlag(1)

setOutputEncoding(1)

transformTree(1)

treebankLanguagePack(1)

Example #1

Show file

File: ManipulateTopBracket.java Project: alishir/CoreNLP

  public static void main(String[] args) {
    if (args.length < minArgs) {
      System.out.println(usage());
      System.exit(-1);
    }

    Properties options = StringUtils.argsToProperties(args, argDefs());
    Language language = PropertiesUtils.get(options, "l", Language.English, Language.class);
    TreebankLangParserParams tlpp = language.params;
    DiskTreebank tb = null;
    String encoding = options.getProperty("l", "UTF-8");
    boolean removeBracket = PropertiesUtils.getBool(options, "b", false);

    tlpp.setInputEncoding(encoding);
    tlpp.setOutputEncoding(encoding);
    tb = tlpp.diskTreebank();

    String[] files = options.getProperty("", "").split("\\s+");
    if (files.length != 0) {
      for (String filename : files) {
        tb.loadPath(filename);
      }
    } else {
      log.info(usage());
      System.exit(-1);
    }

    PrintWriter pwo = tlpp.pw();
    String startSymbol = tlpp.treebankLanguagePack().startSymbol();
    TreeFactory tf = new LabeledScoredTreeFactory();
    int nTrees = 0;
    for (Tree t : tb) {
      if (removeBracket) {
        if (t.value().equals(startSymbol)) {
          t = t.firstChild();
        }

      } else if (!t.value().equals(startSymbol)) { // Add a bracket if it isn't already there
        t = tf.newTreeNode(startSymbol, Collections.singletonList(t));
      }
      pwo.println(t.toString());
      nTrees++;
    }
    pwo.close();
    System.err.printf("Processed %d trees.%n", nTrees);
  }

Example #2

Show file

File: LeafAncestorEval.java Project: automenta/corenlp

  /** Execute with no arguments for usage. */
  public static void main(String[] args) {

    if (!validateCommandLine(args)) {
      System.err.println(USAGE);
      System.exit(-1);
    }

    final TreebankLangParserParams tlpp = LANGUAGE.params;
    final PrintWriter pwOut = tlpp.pw();

    final Treebank guessTreebank = tlpp.diskTreebank();
    guessTreebank.loadPath(guessFile);
    pwOut.println("GUESS TREEBANK:");
    pwOut.println(guessTreebank.textualSummary());

    final Treebank goldTreebank = tlpp.diskTreebank();
    goldTreebank.loadPath(goldFile);
    pwOut.println("GOLD TREEBANK:");
    pwOut.println(goldTreebank.textualSummary());

    final LeafAncestorEval metric = new LeafAncestorEval("LeafAncestor");

    final TreeTransformer tc = tlpp.collinizer();

    // The evalb ref implementation assigns status for each tree pair as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    // In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
    final Iterator<Tree> goldItr = goldTreebank.iterator();
    final Iterator<Tree> guessItr = guessTreebank.iterator();
    int goldLineId = 0;
    int guessLineId = 0;
    int skippedGuessTrees = 0;
    while (guessItr.hasNext() && goldItr.hasNext()) {
      Tree guessTree = guessItr.next();
      List<? extends Label> guessYield = guessTree.yield();
      guessLineId++;

      Tree goldTree = goldItr.next();
      List<? extends Label> goldYield = goldTree.yield();
      goldLineId++;

      // Check that we should evaluate this tree
      if (goldYield.size() > MAX_GOLD_YIELD) {
        skippedGuessTrees++;
        continue;
      }

      // Only trees with equal yields can be evaluated
      if (goldYield.size() != guessYield.size()) {
        pwOut.printf(
            "Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n",
            goldYield.size(), guessYield.size(), goldLineId, guessLineId);
        skippedGuessTrees++;
        continue;
      }

      final Tree evalGuess = tc.transformTree(guessTree);
      final Tree evalGold = tc.transformTree(goldTree);

      metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
    }

    if (guessItr.hasNext() || goldItr.hasNext()) {
      System.err.printf(
          "Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.",
          guessLineId, goldLineId);
    }

    pwOut.println(
        "================================================================================");
    if (skippedGuessTrees != 0)
      pwOut.printf("%s %d guess trees%n", "Unable to evaluate", skippedGuessTrees);
    metric.display(true, pwOut);
    pwOut.close();
  }

Example #3

Show file

File: UnlabeledAttachmentEval.java Project: automenta/corenlp

  /**
   * Run the Evalb scoring metric on guess/gold input. The default language is English.
   *
   * @param args
   */
  public static void main(String[] args) {
    TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
    int maxGoldYield = Integer.MAX_VALUE;
    boolean VERBOSE = false;
    String encoding = "UTF-8";

    String guessFile = null;
    String goldFile = null;

    Map<String, String[]> argsMap = StringUtils.argsToMap(args, optionArgDefs);

    for (Map.Entry<String, String[]> opt : argsMap.entrySet()) {
      if (opt.getKey() == null) continue;
      if (opt.getKey().equals("-l")) {
        Language lang = Language.valueOf(opt.getValue()[0].trim());
        tlpp = lang.params;

      } else if (opt.getKey().equals("-y")) {
        maxGoldYield = Integer.parseInt(opt.getValue()[0].trim());

      } else if (opt.getKey().equals("-v")) {
        VERBOSE = true;

      } else if (opt.getKey().equals("-e")) {
        encoding = opt.getValue()[0];

      } else {
        System.err.println(usage.toString());
        System.exit(-1);
      }

      // Non-option arguments located at key null
      String[] rest = argsMap.get(null);
      if (rest == null || rest.length < minArgs) {
        System.err.println(usage.toString());
        System.exit(-1);
      }
      goldFile = rest[0];
      guessFile = rest[1];
    }

    tlpp.setInputEncoding(encoding);
    final PrintWriter pwOut = tlpp.pw();

    final Treebank guessTreebank = tlpp.diskTreebank();
    guessTreebank.loadPath(guessFile);
    pwOut.println("GUESS TREEBANK:");
    pwOut.println(guessTreebank.textualSummary());

    final Treebank goldTreebank = tlpp.diskTreebank();
    goldTreebank.loadPath(goldFile);
    pwOut.println("GOLD TREEBANK:");
    pwOut.println(goldTreebank.textualSummary());

    final UnlabeledAttachmentEval metric =
        new UnlabeledAttachmentEval("UAS LP/LR", true, tlpp.headFinder());

    final TreeTransformer tc = tlpp.collinizer();

    // The evalb ref implementation assigns status for each tree pair as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    // In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
    final Iterator<Tree> goldItr = goldTreebank.iterator();
    final Iterator<Tree> guessItr = guessTreebank.iterator();
    int goldLineId = 0;
    int guessLineId = 0;
    int skippedGuessTrees = 0;
    while (guessItr.hasNext() && goldItr.hasNext()) {
      Tree guessTree = guessItr.next();
      List<? extends Label> guessYield = guessTree.yield();
      guessLineId++;

      Tree goldTree = goldItr.next();
      List<? extends Label> goldYield = goldTree.yield();
      goldLineId++;

      // Check that we should evaluate this tree
      if (goldYield.size() > maxGoldYield) {
        skippedGuessTrees++;
        continue;
      }

      // Only trees with equal yields can be evaluated
      if (goldYield.size() != guessYield.size()) {
        pwOut.printf(
            "Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n",
            goldYield.size(), guessYield.size(), goldLineId, guessLineId);
        skippedGuessTrees++;
        continue;
      }

      final Tree evalGuess = tc.transformTree(guessTree);
      evalGuess.indexLeaves(true);
      final Tree evalGold = tc.transformTree(goldTree);
      evalGold.indexLeaves(true);

      metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
    }

    if (guessItr.hasNext() || goldItr.hasNext()) {
      System.err.printf(
          "Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.",
          guessLineId, goldLineId);
    }

    pwOut.println(
        "================================================================================");
    if (skippedGuessTrees != 0)
      pwOut.printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
    metric.display(true, pwOut);

    pwOut.println();
    pwOut.close();
  }

Example #4

Show file

File: TreebankFactoredLexiconStats.java Project: toliwa/CoreNLP

  /** @param args */
  public static void main(String[] args) {
    if (args.length != 3) {
      System.err.printf(
          "Usage: java %s language filename features%n",
          TreebankFactoredLexiconStats.class.getName());
      System.exit(-1);
    }

    Language language = Language.valueOf(args[0]);
    TreebankLangParserParams tlpp = language.params;
    if (language.equals(Language.Arabic)) {
      String[] options = {"-arabicFactored"};
      tlpp.setOptionFlag(options, 0);
    } else {
      String[] options = {"-frenchFactored"};
      tlpp.setOptionFlag(options, 0);
    }
    Treebank tb = tlpp.diskTreebank();
    tb.loadPath(args[1]);

    MorphoFeatureSpecification morphoSpec =
        language.equals(Language.Arabic)
            ? new ArabicMorphoFeatureSpecification()
            : new FrenchMorphoFeatureSpecification();

    String[] features = args[2].trim().split(",");
    for (String feature : features) {
      morphoSpec.activate(MorphoFeatureType.valueOf(feature));
    }

    // Counters
    Counter<String> wordTagCounter = new ClassicCounter<>(30000);
    Counter<String> morphTagCounter = new ClassicCounter<>(500);
    //    Counter<String> signatureTagCounter = new ClassicCounter<String>();
    Counter<String> morphCounter = new ClassicCounter<>(500);
    Counter<String> wordCounter = new ClassicCounter<>(30000);
    Counter<String> tagCounter = new ClassicCounter<>(300);

    Counter<String> lemmaCounter = new ClassicCounter<>(25000);
    Counter<String> lemmaTagCounter = new ClassicCounter<>(25000);

    Counter<String> richTagCounter = new ClassicCounter<>(1000);

    Counter<String> reducedTagCounter = new ClassicCounter<>(500);

    Counter<String> reducedTagLemmaCounter = new ClassicCounter<>(500);

    Map<String, Set<String>> wordLemmaMap = Generics.newHashMap();

    TwoDimensionalIntCounter<String, String> lemmaReducedTagCounter =
        new TwoDimensionalIntCounter<>(30000);
    TwoDimensionalIntCounter<String, String> reducedTagTagCounter =
        new TwoDimensionalIntCounter<>(500);
    TwoDimensionalIntCounter<String, String> tagReducedTagCounter =
        new TwoDimensionalIntCounter<>(300);

    int numTrees = 0;
    for (Tree tree : tb) {
      for (Tree subTree : tree) {
        if (!subTree.isLeaf()) {
          tlpp.transformTree(subTree, tree);
        }
      }
      List<Label> pretermList = tree.preTerminalYield();
      List<Label> yield = tree.yield();
      assert yield.size() == pretermList.size();

      int yieldLen = yield.size();
      for (int i = 0; i < yieldLen; ++i) {
        String tag = pretermList.get(i).value();

        String word = yield.get(i).value();
        String morph = ((CoreLabel) yield.get(i)).originalText();

        // Note: if there is no lemma, then we use the surface form.
        Pair<String, String> lemmaTag = MorphoFeatureSpecification.splitMorphString(word, morph);
        String lemma = lemmaTag.first();
        String richTag = lemmaTag.second();

        // WSGDEBUG
        if (tag.contains("MW")) lemma += "-MWE";

        lemmaCounter.incrementCount(lemma);
        lemmaTagCounter.incrementCount(lemma + tag);

        richTagCounter.incrementCount(richTag);

        String reducedTag = morphoSpec.strToFeatures(richTag).toString();
        reducedTagCounter.incrementCount(reducedTag);

        reducedTagLemmaCounter.incrementCount(reducedTag + lemma);

        wordTagCounter.incrementCount(word + tag);
        morphTagCounter.incrementCount(morph + tag);
        morphCounter.incrementCount(morph);
        wordCounter.incrementCount(word);
        tagCounter.incrementCount(tag);

        reducedTag = reducedTag.equals("") ? "NONE" : reducedTag;
        if (wordLemmaMap.containsKey(word)) {
          wordLemmaMap.get(word).add(lemma);
        } else {
          Set<String> lemmas = Generics.newHashSet(1);
          wordLemmaMap.put(word, lemmas);
        }
        lemmaReducedTagCounter.incrementCount(lemma, reducedTag);
        reducedTagTagCounter.incrementCount(lemma + reducedTag, tag);
        tagReducedTagCounter.incrementCount(tag, reducedTag);
      }
      ++numTrees;
    }

    // Barf...
    System.out.println("Language: " + language.toString());
    System.out.printf("#trees:\t%d%n", numTrees);
    System.out.printf("#tokens:\t%d%n", (int) wordCounter.totalCount());
    System.out.printf("#words:\t%d%n", wordCounter.keySet().size());
    System.out.printf("#tags:\t%d%n", tagCounter.keySet().size());
    System.out.printf("#wordTagPairs:\t%d%n", wordTagCounter.keySet().size());
    System.out.printf("#lemmas:\t%d%n", lemmaCounter.keySet().size());
    System.out.printf("#lemmaTagPairs:\t%d%n", lemmaTagCounter.keySet().size());
    System.out.printf("#feattags:\t%d%n", reducedTagCounter.keySet().size());
    System.out.printf("#feattag+lemmas:\t%d%n", reducedTagLemmaCounter.keySet().size());
    System.out.printf("#richtags:\t%d%n", richTagCounter.keySet().size());
    System.out.printf("#richtag+lemma:\t%d%n", morphCounter.keySet().size());
    System.out.printf("#richtag+lemmaTagPairs:\t%d%n", morphTagCounter.keySet().size());

    // Extra
    System.out.println("==================");
    StringBuilder sbNoLemma = new StringBuilder();
    StringBuilder sbMultLemmas = new StringBuilder();
    for (Map.Entry<String, Set<String>> wordLemmas : wordLemmaMap.entrySet()) {
      String word = wordLemmas.getKey();
      Set<String> lemmas = wordLemmas.getValue();
      if (lemmas.size() == 0) {
        sbNoLemma.append("NO LEMMAS FOR WORD: " + word + "\n");
        continue;
      }
      if (lemmas.size() > 1) {
        sbMultLemmas.append("MULTIPLE LEMMAS: " + word + " " + setToString(lemmas) + "\n");
        continue;
      }
      String lemma = lemmas.iterator().next();
      Set<String> reducedTags = lemmaReducedTagCounter.getCounter(lemma).keySet();
      if (reducedTags.size() > 1) {
        System.out.printf("%s --> %s%n", word, lemma);
        for (String reducedTag : reducedTags) {
          int count = lemmaReducedTagCounter.getCount(lemma, reducedTag);
          String posTags =
              setToString(reducedTagTagCounter.getCounter(lemma + reducedTag).keySet());
          System.out.printf("\t%s\t%d\t%s%n", reducedTag, count, posTags);
        }
        System.out.println();
      }
    }
    System.out.println("==================");
    System.out.println(sbNoLemma.toString());
    System.out.println(sbMultLemmas.toString());
    System.out.println("==================");
    List<String> tags = new ArrayList<>(tagReducedTagCounter.firstKeySet());
    Collections.sort(tags);
    for (String tag : tags) {
      System.out.println(tag);
      Set<String> reducedTags = tagReducedTagCounter.getCounter(tag).keySet();
      for (String reducedTag : reducedTags) {
        int count = tagReducedTagCounter.getCount(tag, reducedTag);
        //        reducedTag = reducedTag.equals("") ? "NONE" : reducedTag;
        System.out.printf("\t%s\t%d%n", reducedTag, count);
      }
      System.out.println();
    }
    System.out.println("==================");
  }