예제 #1
0
 private Tree<String> merge(Tree<String> leftTree, Tree<String> rightTree) {
   int span = leftTree.getYield().size() + rightTree.getYield().size();
   String mostFrequentLabel = spanToCategories.getCounter(span).argMax();
   List<Tree<String>> children = new ArrayList<Tree<String>>();
   children.add(leftTree);
   children.add(rightTree);
   return new Tree<String>(mostFrequentLabel, children);
 }
예제 #2
0
 public void train(List<Tree<String>> trainTrees) {
   lexicon = new Lexicon(trainTrees);
   knownParses = new CounterMap<List<String>, Tree<String>>();
   spanToCategories = new CounterMap<Integer, String>();
   for (Tree<String> trainTree : trainTrees) {
     List<String> tags = trainTree.getPreTerminalYield();
     knownParses.incrementCount(tags, trainTree, 1.0);
     tallySpans(trainTree, 0);
   }
 }
예제 #3
0
 private void tallyTree(
     Tree<String> tree,
     Counter<String> symbolCounter,
     Counter<UnaryRule> unaryRuleCounter,
     Counter<BinaryRule> binaryRuleCounter) {
   if (tree.isLeaf()) return;
   if (tree.isPreTerminal()) return;
   if (tree.getChildren().size() == 1) {
     UnaryRule unaryRule = makeUnaryRule(tree);
     symbolCounter.incrementCount(tree.getLabel(), 1.0);
     unaryRuleCounter.incrementCount(unaryRule, 1.0);
   }
   if (tree.getChildren().size() == 2) {
     BinaryRule binaryRule = makeBinaryRule(tree);
     symbolCounter.incrementCount(tree.getLabel(), 1.0);
     binaryRuleCounter.incrementCount(binaryRule, 1.0);
   }
   if (tree.getChildren().size() < 1 || tree.getChildren().size() > 2) {
     throw new RuntimeException(
         "Attempted to construct a Grammar with an illegal tree: " + tree);
   }
   for (Tree<String> child : tree.getChildren()) {
     tallyTree(child, symbolCounter, unaryRuleCounter, binaryRuleCounter);
   }
 }
예제 #4
0
 /* Builds a lexicon from the observed tags in a list of training trees. */
 public Lexicon(List<Tree<String>> trainTrees) {
   for (Tree<String> trainTree : trainTrees) {
     List<String> words = trainTree.getYield();
     List<String> tags = trainTree.getPreTerminalYield();
     for (int position = 0; position < words.size(); position++) {
       String word = words.get(position);
       String tag = tags.get(position);
       tallyTagging(word, tag);
     }
   }
 }
예제 #5
0
 private int tallySpans(Tree<String> tree, int start) {
   if (tree.isLeaf() || tree.isPreTerminal()) return 1;
   int end = start;
   for (Tree<String> child : tree.getChildren()) {
     int childSpan = tallySpans(child, end);
     end += childSpan;
   }
   String category = tree.getLabel();
   if (!category.equals("ROOT")) spanToCategories.incrementCount(end - start, category, 1.0);
   return end - start;
 }
예제 #6
0
 private static void testParser(Parser parser, List<Tree<String>> testTrees) {
   EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String> eval =
       new EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String>(
           Collections.singleton("ROOT"),
           new HashSet<String>(Arrays.asList(new String[] {"''", "``", ".", ":", ","})));
   for (Tree<String> testTree : testTrees) {
     List<String> testSentence = testTree.getYield();
     if (testSentence.size() > MAX_LENGTH) continue;
     Tree<String> guessedTree = parser.getBestParse(testSentence);
     System.out.println("Guess:\n" + Trees.PennTreeRenderer.render(guessedTree));
     System.out.println("Gold:\n" + Trees.PennTreeRenderer.render(testTree));
     eval.evaluate(guessedTree, testTree);
   }
   eval.display(true);
 }
예제 #7
0
    private static Tree<String> binarizeTreeHelper(
        Tree<String> tree, int numChildrenGenerated, String intermediateLabel) {
      Tree<String> leftTree = tree.getChildren().get(numChildrenGenerated);
      List<Tree<String>> children = new ArrayList<Tree<String>>();
      children.add(binarizeTree(leftTree));

      // Wait! Don't binarize too much. The last child doesn't need to have a new node.
      // It can be paired with the 2nd to last child!
      if (numChildrenGenerated == tree.getChildren().size() - 2) {
        children.add(binarizeTree(tree.getChildren().get(numChildrenGenerated + 1)));
      } else if (numChildrenGenerated < tree.getChildren().size() - 1) {
        Tree<String> rightTree =
            binarizeTreeHelper(
                tree, numChildrenGenerated + 1, intermediateLabel + "_" + leftTree.getLabel());
        children.add(rightTree);
      }
      return new Tree<String>(intermediateLabel, children);
    }
예제 #8
0
    private static Tree<String> markovizeTree(Tree<String> tree, String parentLabel) {
      String label = tree.getLabel();

      // Tried using ^ but unannotate didn't remove it. Instead using - since that is properly
      // removed
      if (parentLabel != null) {
        label = label + "-" + parentLabel;
      }

      // If you're a preterminal, don't bother markovizing
      if (tree.isPreTerminal()) return tree;

      List<Tree<String>> children = new ArrayList<Tree<String>>();
      for (Tree<String> child : tree.getChildren()) {
        children.add(markovizeTree(child, tree.getLabel()));
      }

      Tree<String> newTree = new Tree<String>(label, children);

      return newTree;
    }
예제 #9
0
    private static Tree<String> binarizeTree(Tree<String> tree) {
      String label = tree.getLabel();
      if (tree.isLeaf()) return new Tree<String>(label);
      if (tree.getChildren().size() == 1) {
        return new Tree<String>(
            label, Collections.singletonList(binarizeTree(tree.getChildren().get(0))));
      }
      // I think it tries to binarize a binary tree. This is silly. Just binarize the subtrees.
      if (tree.getChildren().size() == 2) {

        List<Tree<String>> children = new ArrayList<Tree<String>>();
        children.add(binarizeTree(tree.getChildren().get(0)));
        children.add(binarizeTree(tree.getChildren().get(1)));
        return new Tree<String>(label, children);
      }

      // otherwise, it's a TERNARY-or-more local tree,
      // so decompose it into a sequence of binary and unary trees.
      String intermediateLabel = "@" + label + "->";
      Tree<String> intermediateTree = binarizeTreeHelper(tree, 0, intermediateLabel);
      return new Tree<String>(label, intermediateTree.getChildren());
    }
예제 #10
0
 private BinaryRule makeBinaryRule(Tree<String> tree) {
   return new BinaryRule(
       tree.getLabel(),
       tree.getChildren().get(0).getLabel(),
       tree.getChildren().get(1).getLabel());
 }
예제 #11
0
 private Tree<String> getBestKnownParse(List<String> tags, List<String> sentence) {
   Tree<String> parse = knownParses.getCounter(tags).argMax().deepCopy();
   parse.setWords(sentence);
   return parse;
 }
예제 #12
0
    public Tree<String> getParseTreeOld(
        List<String> sentence,
        HashMap<String, Triplet<Integer, String, String>> backHash,
        int begin,
        int end,
        String parent) {
      // Start from the root and keep going down till you reach the leafs.
      // System.out.println("In recursion!!");
      if (begin == end - 1) {

        if ((begin + " " + end).equals("0 1")) {
          // System.out.println("CounterMap");
          // System.out.println(parseScores.getCounter(begin+" "+end).toString());
          // backHash.get(begin+ " " + end+ " "+parent);
        }

        // String parent = parseScores.getCounter(begin+" "+end).argMax();

        // System.out.println("Terminal cond :"+begin+ " "+ end+ " "+parent);
        Triplet<Integer, String, String> triplet = backHash.get(begin + " " + end + " " + parent);
        int split = -1;
        if (triplet != null) {
          split = triplet.getFirst();
        }
        if ((begin + " " + end).equals("0 1")) {
          // System.out.println("CounterMap");
          // System.out.println(parseScores.getCounter(begin+" "+end).toString());
          // System.out.println(backHash.get(begin+ " " + end+ " "+parent).toString());
        }

        Tree<String> topTree = new Tree<String>(parent);
        Tree<String> tree = topTree;
        while (triplet != null && split == -1) {

          Tree<String> singleTree = new Tree<String>(triplet.getSecond());
          tree.setChildren(Collections.singletonList(singleTree));

          triplet = backHash.get(begin + " " + end + " " + triplet.getSecond());
          if (triplet != null) {
            split = triplet.getFirst();
          }
          tree = tree.getChildren().get(0);
        }

        // return new Tree<String>(tree.getLabel(), ));
        tree.setChildren(Collections.singletonList(new Tree<String>(sentence.get(begin))));
        return topTree;
      }

      /*if((begin +" " + end).equals("1 5")){
      	System.out.println("CounterMap");
      	System.out.println(parseScores.getCounter(begin+" "+end).toString());
      	//backHash.get(begin+ " " + end+ " "+parent);
      }*/
      // String parent = parseScores.getCounter(begin+" "+end).argMax();
      // System.out.println(parent);
      Triplet<Integer, String, String> triplet = backHash.get(begin + " " + end + " " + parent);
      // System.out.println(triplet.getSecond() + "  " + triplet.getFirst());

      if ((begin + " " + end).equals("0 6")) {
        // System.out.println("CounterMap");
        // System.out.println(parent);
        // System.out.println(backHash.get(begin+ " " + end+ " "+parent).toString());
      }

      if (triplet == null) {
        System.out.println(begin + " " + end + " " + parent);
      }
      int split = triplet.getFirst();
      Tree<String> topTree = new Tree<String>(parent);
      Tree<String> tree = topTree;
      // System.out.println("parent : " +parent);
      while (split == -1) {
        // System.out.println(tree.toString());
        Tree<String> singleTree = new Tree<String>(triplet.getSecond());
        // System.out.println(triplet.getSecond());
        tree.setChildren(Collections.singletonList(singleTree));
        // System.out.println(tree.toString());
        // System.out.println("XXXXXXXXXXXXXXXXXXXXXXXXXXXxx");
        // System.out.println(triplet.getSecond());
        triplet = backHash.get(begin + " " + end + " " + triplet.getSecond());
        if (triplet != null) {
          split = triplet.getFirst();
        }
        tree = tree.getChildren().get(0);
      }
      // System.out.println(tree.toString());

      Tree<String> leftTree =
          getParseTreeOld(sentence, backHash, begin, split, triplet.getSecond());
      Tree<String> rightTree = getParseTreeOld(sentence, backHash, split, end, triplet.getThird());
      // System.out.println("leftTree: "+ leftTree.toString());
      // System.out.println("rightTree :" +rightTree.toString());
      // System.out.println("topTree :"+topTree.toString());
      List<Tree<String>> children = new ArrayList<Tree<String>>();
      children.add(leftTree);
      children.add(rightTree);
      tree.setChildren(children);
      return topTree;
    }
예제 #13
0
    public Tree<String> getParseTree(
        List<String> sentence,
        HashMap<Triplet<Integer, Integer, String>, Triplet<Integer, String, String>> backHash,
        int begin,
        int end,
        String parent) {
      // Start from the root and keep going down till you reach the leafs.
      if (begin == end - 1) {
        Triplet<Integer, String, String> triplet =
            backHash.get(new Triplet<Integer, Integer, String>(begin, end, parent));
        int split = -1;
        if (triplet != null) {
          split = triplet.getFirst();
        }

        Tree<String> topTree = new Tree<String>(parent);
        Tree<String> tree = topTree;
        while (triplet != null && split == -1) {

          Tree<String> singleTree = new Tree<String>(triplet.getSecond());
          tree.setChildren(Collections.singletonList(singleTree));

          triplet =
              backHash.get(new Triplet<Integer, Integer, String>(begin, end, triplet.getSecond()));
          if (triplet != null) {
            split = triplet.getFirst();
          }
          tree = tree.getChildren().get(0);
        }

        tree.setChildren(Collections.singletonList(new Tree<String>(sentence.get(begin))));
        return topTree;
      }

      Triplet<Integer, String, String> triplet =
          backHash.get(new Triplet<Integer, Integer, String>(begin, end, parent));

      if (triplet == null) {
        System.out.println(begin + " " + end + " " + parent);
      }
      int split = triplet.getFirst();
      Tree<String> topTree = new Tree<String>(parent);
      Tree<String> tree = topTree;

      while (split == -1) {
        Tree<String> singleTree = new Tree<String>(triplet.getSecond());
        tree.setChildren(Collections.singletonList(singleTree));
        triplet =
            backHash.get(new Triplet<Integer, Integer, String>(begin, end, triplet.getSecond()));
        if (triplet != null) {
          split = triplet.getFirst();
        }
        tree = tree.getChildren().get(0);
      }

      Tree<String> leftTree = getParseTree(sentence, backHash, begin, split, triplet.getSecond());
      Tree<String> rightTree = getParseTree(sentence, backHash, split, end, triplet.getThird());

      List<Tree<String>> children = new ArrayList<Tree<String>>();
      children.add(leftTree);
      children.add(rightTree);
      tree.setChildren(children);
      return topTree;
    }