Java TregexPattern 예제들, edu.stanford.nlp.trees.tregex.TregexPattern Java 예제들

예제 #1

0

파일 보기

파일: AnalysisUtilities.java 프로젝트: kutschkem/SmithHeilmann_fork

  /**
   * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn
   * Treebank-style tree.
   *
   * @param inputTree
   */
  public void normalizeTree(Tree inputTree) {
    inputTree.label().setFromString("ROOT");

    List<Pair<TregexPattern, TsurgeonPattern>> ops =
        new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    TregexMatcher matcher;

    tregexOpStr = "/\\-NONE\\-/=emptynode";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    ps.add(Tsurgeon.parseOperation("prune emptynode"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, inputTree);

    Label nonterminalLabel;

    tregexOpStr = "/.+\\-.+/=nonterminal < __";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    while (matcher.find()) {
      nonterminalLabel = matcher.getNode("nonterminal");
      if (nonterminalLabel == null) continue;
      nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value()));
    }
  }

예제 #2

0

파일 보기

파일: Tsurgeon.java 프로젝트: nitish11/CoreNLP

 /**
  * Tries to match a pattern against a tree. If it succeeds, apply the surgical operations
  * contained in a {@link TsurgeonPattern}.
  *
  * @param matchPattern A {@link TregexPattern} to be matched against a {@link Tree}.
  * @param p A {@link TsurgeonPattern} to apply.
  * @param t the {@link Tree} to match against and perform surgery on.
  * @return t, which has been surgically modified.
  */
 public static Tree processPattern(TregexPattern matchPattern, TsurgeonPattern p, Tree t) {
   TregexMatcher m = matchPattern.matcher(t);
   while (m.find()) {
     t = p.evaluate(t, m);
     if (t == null) {
       break;
     }
     m = matchPattern.matcher(t);
   }
   return t;
 }

예제 #3

0

파일 보기

파일: TregexPattern.java 프로젝트: hans/CoreNLP

 private void prettyPrint(PrintWriter pw, int indent) {
   for (int i = 0; i < indent; i++) {
     pw.print("   ");
   }
   if (neg) {
     pw.print('!');
   }
   if (opt) {
     pw.print('?');
   }
   pw.println(localString());
   for (TregexPattern child : getChildren()) {
     child.prettyPrint(pw, indent + 1);
   }
 }

예제 #4

0

파일 보기

파일: ATBCorrector.java 프로젝트: automenta/corenlp

  private static List<Pair<TregexPattern, TsurgeonPattern>> loadOps() {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<>();

    String line = null;
    try {
      BufferedReader br = new BufferedReader(new StringReader(editStr));
      List<TsurgeonPattern> tsp = new ArrayList<>();
      while ((line = br.readLine()) != null) {
        if (DEBUG) System.err.print("Pattern is " + line);
        TregexPattern matchPattern = TregexPattern.compile(line);
        if (DEBUG) System.err.println(" [" + matchPattern + ']');
        tsp.clear();
        while (continuing(line = br.readLine())) {
          TsurgeonPattern p = Tsurgeon.parseOperation(line);
          if (DEBUG) System.err.println("Operation is " + line + " [" + p + ']');
          tsp.add(p);
        }
        if (!tsp.isEmpty()) {
          TsurgeonPattern tp = Tsurgeon.collectOperations(tsp);
          ops.add(new Pair<>(matchPattern, tp));
        }
      } // while not at end of file
    } catch (IOException ioe) {
      ioe.printStackTrace();
    }

    return ops;
  }

예제 #5

0

파일 보기

파일: PronounFilter.java 프로젝트: zhuzixiao/Dragon

  public static boolean filter(PhraseInfo phrase) {
    if (phrase == null || phrase.getTree() == null) return false;
    Tree phraseTree = phrase.getTree();

    // __ < ( NP < (PRP !< it|them) ) | < ( NP < ( NP < (PRP !< it|them) ) )
    String invalidPronounPattern =
        " ( NP < (PRP=prp !< " + Utils.wordsConjuction(Rules.VALID_PRONOUNS) + ") ) ";
    String filterPattern =
        "__ < " + invalidPronounPattern + " | < ( NP <" + invalidPronounPattern + ")";

    TregexPattern tregexPattern = TregexPattern.compile(filterPattern);
    TregexMatcher matcher = tregexPattern.matcher(phraseTree);

    if (matcher.matches()) {
      Proof proof = new Proof(ProofType.FAIL_PERSONAL_PRONOUN);
      Tree evdTree = matcher.getNode("prp");
      proof.setEvidenceTree(evdTree);
      phrase.addProof(proof);
      return false;
    }

    return true;
  }

예제 #6

0

파일 보기

파일: TregexPatternFactory.java 프로젝트: narain280493/Automatic-Question-Generation

 public static TregexPattern getPattern(String tregex) {
   if (instance == null) {
     instance = new TregexPatternFactory();
   }
   Map<String, TregexPattern> myMap = instance.getMap();
   TregexPattern pattern = myMap.get(tregex);
   if (pattern == null) {
     try {
       pattern = TregexPattern.compile(tregex);
       myMap.put(tregex, pattern);
     } catch (Exception e) {
       e.printStackTrace();
     }
   }
   return pattern;
 }

예제 #7

0

파일 보기

파일: TregexPattern.java 프로젝트: hans/CoreNLP

 // todo: add an option to only print each tree once, regardless.  Most useful in conjunction
 // with -w
 public void visitTree(Tree t) {
   treeNumber++;
   if (printTree) {
     pw.print(treeNumber + ":");
     pw.println("Next tree read:");
     tp.printTree(t, pw);
   }
   TregexMatcher match = p.matcher(t);
   if (printNonMatchingTrees) {
     if (match.find()) numMatches++;
     else tp.printTree(t, pw);
     return;
   }
   Tree lastMatchingRootNode = null;
   while (match.find()) {
     if (oneMatchPerRootNode) {
       if (lastMatchingRootNode == match.getMatch()) continue;
       else lastMatchingRootNode = match.getMatch();
     }
     numMatches++;
     if (printFilename && treebank instanceof DiskTreebank) {
       DiskTreebank dtb = (DiskTreebank) treebank;
       pw.print("# ");
       pw.println(dtb.getCurrentFilename());
     }
     if (printSubtreeCode) {
       pw.print(treeNumber);
       pw.print(':');
       pw.println(match.getMatch().nodeNumber(t));
     }
     if (printMatches) {
       if (reportTreeNumbers) {
         pw.print(treeNumber);
         pw.print(": ");
       }
       if (printTree) {
         pw.println("Found a full match:");
       }
       if (printWholeTree) {
         tp.printTree(t, pw);
       } else if (handles != null) {
         if (printTree) {
           pw.println("Here's the node you were interested in:");
         }
         for (String handle : handles) {
           Tree labeledNode = match.getNode(handle);
           if (labeledNode == null) {
             System.err.println(
                 "Error!!  There is no matched node \""
                     + handle
                     + "\"!  Did you specify such a label in the pattern?");
           } else {
             tp.printTree(labeledNode, pw);
           }
         }
       } else {
         tp.printTree(match.getMatch(), pw);
       }
       // pw.println();  // TreePrint already puts a blank line in
     } // end if (printMatches)
   } // end while match.find()
 } // end visitTree

예제 #8

0

파일 보기

파일: TregexPattern.java 프로젝트: hans/CoreNLP

  /**
   * Prints out all matches of a tree pattern on each tree in the path. Usage: <br>
   * <br>
   * <code>
   * java edu.stanford.nlp.trees.tregex.TregexPattern [[-TCwfosnu] [-filter] [-h &lt;node-name&gt;]]* pattern
   *  filepath   </code>
   *
   * <p>Arguments:<br>
   *
   * <ul>
   *   <li><code>pattern</code>: the tree pattern which optionally names some set of nodes (i.e.,
   *       gives it the "handle") <code>=name</code> (for some arbitrary string "name")
   *   <li><code>filepath</code>: the path to files with trees. If this is a directory, there will
   *       be recursive descent and the pattern will be run on all files beneath the specified
   *       directory.
   * </ul>
   *
   * <p>Options:<br>
   * <li><code>-C</code> suppresses printing of matches, so only the number of matches is printed.
   * <li><code>-w</code> causes the whole of a tree that matches to be printed.
   * <li><code>-f</code> causes the filename to be printed.
   * <li><code>-i &lt;filename&gt;</code> causes the pattern to be matched to be read from <code>
   *     &lt;filename&gt;</code> rather than the command line. Don't specify a pattern when this
   *     option is used.
   * <li><code>-o</code> Specifies that each tree node can be reported only once as the root of a
   *     match (by default a node will be printed once for every <em>way</em> the pattern matches).
   * <li><code>-s</code> causes trees to be printed all on one line (by default they are pretty
   *     printed).
   * <li><code>-n</code> causes the number of the tree in which the match was found to be printed
   *     before every match.
   * <li><code>-u</code> causes only the label of each matching node to be printed, not complete
   *     subtrees.
   * <li><code>-t</code> causes only the yield (terminal words) of the selected node to be printed
   *     (or the yield of the whole tree, if the <code>-w</code> option is used).
   * <li><code>-encoding &lt;charset_encoding&gt;</code> option allows specification of character
   *     encoding of trees..
   * <li><code>-h &lt;node-handle&gt;</code> If a <code>-h</code> option is given, the root tree
   *     node will not be printed. Instead, for each <code>node-handle</code> specified, the node
   *     matched and given that handle will be printed. Multiple nodes can be printed by using the
   *     <code>-h</code> option multiple times on a single command line.
   * <li><code>-hf &lt;headfinder-class-name&gt;</code> use the specified {@link HeadFinder} class
   *     to determine headship relations.
   * <li><code>-hfArg &lt;string&gt;</code> pass a string argument in to the {@link HeadFinder}
   *     class's constructor. <code>-hfArg</code> can be used multiple times to pass in multiple
   *     arguments.
   * <li><code>-trf &lt;TreeReaderFactory-class-name&gt;</code> use the specified {@link
   *     TreeReaderFactory} class to read trees from files.
   * <li><code>-v</code> print every tree that contains no matches of the specified pattern, but
   *     print no matches to the pattern.
   * <li><code>-x</code> Instead of the matched subtree, print the matched subtree's identifying
   *     number as defined in <tt>tgrep2</tt>:a unique identifier for the subtree and is in the form
   *     s:n, where s is an integer specifying the sentence number in the corpus (starting with 1),
   *     and n is an integer giving the order in which the node is encountered in a depth-first
   *     search starting with 1 at top node in the sentence tree.
   * <li><code>-extract &lt;code&gt; &lt;tree-file&gt;</code> extracts the subtree s:n specified by
   *     <tt>code</tt> from the specified <tt>tree-file</tt>. Overrides all other behavior of
   *     tregex. Can't specify multiple encodings etc. yet.
   * <li><code>-extractFile &lt;code-file&gt; &lt;tree-file&gt;</code> extracts every subtree
   *     specified by the subtree codes in <tt>code-file</tt>, which must appear exactly one per
   *     line, from the specified <tt>tree-file</tt>. Overrides all other behavior of tregex. Can't
   *     specify multiple encodings etc. yet.
   * <li><code>-filter</code> causes this to act as a filter, reading tree input from stdin
   * <li><code>-T</code> causes all trees to be printed as processed (for debugging purposes).
   *     Otherwise only matching nodes are printed.
   * <li><code>-macros &lt;filename&gt;</code> filename with macro substitutions to use. file with
   *     tab separated lines original-tab-replacement
   * </ul>
   */
  public static void main(String[] args) throws IOException {
    Timing.startTime();

    StringBuilder treePrintFormats = new StringBuilder();
    String printNonMatchingTreesOption = "-v";
    String subtreeCodeOption = "-x";
    String extractSubtreesOption = "-extract";
    String extractSubtreesFileOption = "-extractFile";
    String inputFileOption = "-i";
    String headFinderOption = "-hf";
    String headFinderArgOption = "-hfArg";
    String trfOption = "-trf";
    String headFinderClassName = null;
    String[] headFinderArgs = StringUtils.EMPTY_STRING_ARRAY;
    String treeReaderFactoryClassName = null;
    String printHandleOption = "-h";
    String markHandleOption = "-k";
    String encodingOption = "-encoding";
    String encoding = "UTF-8";
    String macroOption = "-macros";
    String macroFilename = "";
    String yieldOnly = "-t";
    String printAllTrees = "-T";
    String quietMode = "-C";
    String wholeTreeMode = "-w";
    String filenameOption = "-f";
    String oneMatchPerRootNodeMode = "-o";
    String reportTreeNumbers = "-n";
    String rootLabelOnly = "-u";
    String oneLine = "-s";
    Map<String, Integer> flagMap = Generics.newHashMap();
    flagMap.put(extractSubtreesOption, 2);
    flagMap.put(extractSubtreesFileOption, 2);
    flagMap.put(subtreeCodeOption, 0);
    flagMap.put(printNonMatchingTreesOption, 0);
    flagMap.put(encodingOption, 1);
    flagMap.put(inputFileOption, 1);
    flagMap.put(printHandleOption, 1);
    flagMap.put(markHandleOption, 2);
    flagMap.put(headFinderOption, 1);
    flagMap.put(headFinderArgOption, 1);
    flagMap.put(trfOption, 1);
    flagMap.put(macroOption, 1);
    flagMap.put(yieldOnly, 0);
    flagMap.put(quietMode, 0);
    flagMap.put(wholeTreeMode, 0);
    flagMap.put(printAllTrees, 0);
    flagMap.put(filenameOption, 0);
    flagMap.put(oneMatchPerRootNodeMode, 0);
    flagMap.put(reportTreeNumbers, 0);
    flagMap.put(rootLabelOnly, 0);
    flagMap.put(oneLine, 0);
    Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
    args = argsMap.get(null);

    if (argsMap.containsKey(encodingOption)) {
      encoding = argsMap.get(encodingOption)[0];
      System.err.println("Encoding set to " + encoding);
    }
    PrintWriter errPW = new PrintWriter(new OutputStreamWriter(System.err, encoding), true);

    if (argsMap.containsKey(extractSubtreesOption)) {
      List<String> subTreeStrings =
          Collections.singletonList(argsMap.get(extractSubtreesOption)[0]);
      extractSubtrees(subTreeStrings, argsMap.get(extractSubtreesOption)[1]);
      return;
    }
    if (argsMap.containsKey(extractSubtreesFileOption)) {
      List<String> subTreeStrings =
          Arrays.asList(
              IOUtils.slurpFile(argsMap.get(extractSubtreesFileOption)[0]).split("\n|\r|\n\r"));
      extractSubtrees(subTreeStrings, argsMap.get(extractSubtreesFileOption)[0]);
      return;
    }

    if (args.length < 1) {
      errPW.println(
          "Usage: java edu.stanford.nlp.trees.tregex.TregexPattern [-T] [-C] [-w] [-f] [-o] [-n] [-s] [-filter]  [-hf class] [-trf class] [-h handle]* pattern [filepath]");
      return;
    }
    String matchString = args[0];

    if (argsMap.containsKey(macroOption)) {
      macroFilename = argsMap.get(macroOption)[0];
    }
    if (argsMap.containsKey(headFinderOption)) {
      headFinderClassName = argsMap.get(headFinderOption)[0];
      errPW.println("Using head finder " + headFinderClassName + "...");
    }
    if (argsMap.containsKey(headFinderArgOption)) {
      headFinderArgs = argsMap.get(headFinderArgOption);
    }
    if (argsMap.containsKey(trfOption)) {
      treeReaderFactoryClassName = argsMap.get(trfOption)[0];
      errPW.println("Using tree reader factory " + treeReaderFactoryClassName + "...");
    }
    if (argsMap.containsKey(printAllTrees)) {
      TRegexTreeVisitor.printTree = true;
    }
    if (argsMap.containsKey(inputFileOption)) {
      String inputFile = argsMap.get(inputFileOption)[0];
      matchString = IOUtils.slurpFile(inputFile, encoding);
      String[] newArgs = new String[args.length + 1];
      System.arraycopy(args, 0, newArgs, 1, args.length);
      args = newArgs;
    }
    if (argsMap.containsKey(quietMode)) {
      TRegexTreeVisitor.printMatches = false;
      TRegexTreeVisitor.printNumMatchesToStdOut = true;
    }
    if (argsMap.containsKey(printNonMatchingTreesOption)) {
      TRegexTreeVisitor.printNonMatchingTrees = true;
    }
    if (argsMap.containsKey(subtreeCodeOption)) {
      TRegexTreeVisitor.printSubtreeCode = true;
      TRegexTreeVisitor.printMatches = false;
    }
    if (argsMap.containsKey(wholeTreeMode)) {
      TRegexTreeVisitor.printWholeTree = true;
    }
    if (argsMap.containsKey(filenameOption)) {
      TRegexTreeVisitor.printFilename = true;
    }
    if (argsMap.containsKey(oneMatchPerRootNodeMode)) TRegexTreeVisitor.oneMatchPerRootNode = true;
    if (argsMap.containsKey(reportTreeNumbers)) TRegexTreeVisitor.reportTreeNumbers = true;
    if (argsMap.containsKey(rootLabelOnly)) {
      treePrintFormats.append(TreePrint.rootLabelOnlyFormat).append(',');
    } else if (argsMap.containsKey(oneLine)) { // display short form
      treePrintFormats.append("oneline,");
    } else if (argsMap.containsKey(yieldOnly)) {
      treePrintFormats.append("words,");
    } else {
      treePrintFormats.append("penn,");
    }

    HeadFinder hf = new CollinsHeadFinder();
    if (headFinderClassName != null) {
      Class[] hfArgClasses = new Class[headFinderArgs.length];
      for (int i = 0; i < hfArgClasses.length; i++) hfArgClasses[i] = String.class;
      try {
        hf =
            (HeadFinder)
                Class.forName(headFinderClassName)
                    .getConstructor(hfArgClasses)
                    .newInstance(
                        (Object[])
                            headFinderArgs); // cast to Object[] necessary to avoid varargs-related
        // warning.
      } catch (Exception e) {
        throw new RuntimeException("Error occurred while constructing HeadFinder: " + e);
      }
    }

    TRegexTreeVisitor.tp =
        new TreePrint(treePrintFormats.toString(), new PennTreebankLanguagePack());

    try {
      // TreePattern p = TreePattern.compile("/^S/ > S=dt $++ '' $-- ``");
      TregexPatternCompiler tpc = new TregexPatternCompiler(hf);
      Macros.addAllMacros(tpc, macroFilename, encoding);
      TregexPattern p = tpc.compile(matchString);
      errPW.println("Pattern string:\n" + p.pattern());
      errPW.println("Parsed representation:");
      p.prettyPrint(errPW);

      String[] handles = argsMap.get(printHandleOption);
      if (argsMap.containsKey("-filter")) {
        TreeReaderFactory trf = getTreeReaderFactory(treeReaderFactoryClassName);
        treebank =
            new MemoryTreebank(
                trf, encoding); // has to be in memory since we're not storing it on disk
        // read from stdin
        Reader reader = new BufferedReader(new InputStreamReader(System.in, encoding));
        ((MemoryTreebank) treebank).load(reader);
        reader.close();
      } else if (args.length == 1) {
        errPW.println("using default tree");
        TreeReader r =
            new PennTreeReader(
                new StringReader(
                    "(VP (VP (VBZ Try) (NP (NP (DT this) (NN wine)) (CC and) (NP (DT these) (NNS snails)))) (PUNCT .))"),
                new LabeledScoredTreeFactory(new StringLabelFactory()));
        Tree t = r.readTree();
        treebank = new MemoryTreebank();
        treebank.add(t);
      } else {
        int last = args.length - 1;
        errPW.println("Reading trees from file(s) " + args[last]);
        TreeReaderFactory trf = getTreeReaderFactory(treeReaderFactoryClassName);
        treebank = new DiskTreebank(trf, encoding);
        treebank.loadPath(args[last], null, true);
      }
      TRegexTreeVisitor vis = new TRegexTreeVisitor(p, handles, encoding);

      treebank.apply(vis);
      Timing.endTime();
      if (TRegexTreeVisitor.printMatches) {
        errPW.println("There were " + vis.numMatches() + " matches in total.");
      }
      if (TRegexTreeVisitor.printNumMatchesToStdOut) {
        System.out.println(vis.numMatches());
      }
    } catch (IOException e) {
      e.printStackTrace();
    } catch (TregexParseException e) {
      errPW.println("Error parsing expression: " + args[0]);
      errPW.println("Parse exception: " + e.toString());
    }
  }

예제 #9

0

파일 보기

파일: ShiftReduceParserQuery.java 프로젝트: automenta/corenlp

public class ShiftReduceParserQuery implements ParserQuery {
  Debinarizer debinarizer = new Debinarizer(false);

  List<? extends HasWord> originalSentence;
  private State initialState, finalState;
  Tree debinarized;

  boolean success;
  boolean unparsable;

  final ShiftReduceParser parser;

  List<ParserConstraint> constraints = null;

  public ShiftReduceParserQuery(ShiftReduceParser parser) {
    this.parser = parser;
  }

  @Override
  public boolean parse(List<? extends HasWord> sentence) {
    this.originalSentence = sentence;
    initialState = ShiftReduceParser.initialStateFromTaggedSentence(sentence);
    return parseInternal();
  }

  public boolean parse(Tree tree) {
    this.originalSentence = tree.yieldHasWord();
    initialState = ShiftReduceParser.initialStateFromGoldTagTree(tree);
    return parseInternal();
  }

  // TODO: we are assuming that sentence final punctuation always has
  // either . or PU as the tag.
  private static TregexPattern rearrangeFinalPunctuationTregex =
      TregexPattern.compile(
          "__ !> __ <- (__=top <- (__ <<- (/[.]|PU/=punc < /[.!?。！？]/ ?> (__=single <: =punc))))");

  private static TsurgeonPattern rearrangeFinalPunctuationTsurgeon =
      Tsurgeon.parseOperation("[move punc >-1 top] [if exists single prune single]");

  private boolean parseInternal() {
    final int maxBeamSize = Math.max(parser.op.testOptions().beamSize, 1);

    success = true;
    unparsable = false;
    PriorityQueue<State> beam =
        new PriorityQueue<>(maxBeamSize + 1, ScoredComparator.ASCENDING_COMPARATOR);
    beam.add(initialState);
    // TODO: don't construct as many PriorityQueues
    while (beam.size() > 0) {
      // System.err.println("================================================");
      // System.err.println("Current beam:");
      // System.err.println(beam);
      PriorityQueue<State> oldBeam = beam;
      beam = new PriorityQueue<>(maxBeamSize + 1, ScoredComparator.ASCENDING_COMPARATOR);
      State bestState = null;
      for (State state : oldBeam) {
        Collection<ScoredObject<Integer>> predictedTransitions =
            parser.model.findHighestScoringTransitions(state, true, maxBeamSize, constraints);
        // System.err.println("Examining state: " + state);
        for (ScoredObject<Integer> predictedTransition : predictedTransitions) {
          Transition transition = parser.model.transitionIndex.get(predictedTransition.object());
          State newState = transition.apply(state, predictedTransition.score());
          // System.err.println("  Transition: " + transition + " (" + predictedTransition.score() +
          // ")");
          if (bestState == null || bestState.score() < newState.score()) {
            bestState = newState;
          }
          beam.add(newState);
          if (beam.size() > maxBeamSize) {
            beam.poll();
          }
        }
      }
      if (beam.size() == 0) {
        // Oops, time for some fallback plan
        // This can happen with the set of constraints given by the original paper
        // For example, one particular French model had a situation where it would reach
        //   @Ssub @Ssub .
        // without a left(Ssub) transition, so finishing the parse was impossible.
        // This will probably result in a bad parse, but at least it
        // will result in some sort of parse.
        for (State state : oldBeam) {
          Transition transition = parser.model.findEmergencyTransition(state, constraints);
          if (transition != null) {
            State newState = transition.apply(state);
            if (bestState == null || bestState.score() < newState.score()) {
              bestState = newState;
            }
            beam.add(newState);
          }
        }
      }

      // bestState == null only happens when we have failed to make progress, so quit
      // If the bestState is finished, we are done
      if (bestState == null || bestState.isFinished()) {
        break;
      }
    }
    List<State> bestParses;
    if (beam.size() == 0) {
      success = false;
      unparsable = true;
      debinarized = null;
      finalState = null;
      bestParses = Collections.emptyList();
    } else {
      // TODO: filter out beam elements that aren't finished
      bestParses = Generics.newArrayList(beam);
      Collections.sort(bestParses, beam.comparator());
      Collections.reverse(bestParses);
      finalState = bestParses.get(0);
      debinarized = debinarizer.transformTree(finalState.stack.peek());
      debinarized =
          Tsurgeon.processPattern(
              rearrangeFinalPunctuationTregex, rearrangeFinalPunctuationTsurgeon, debinarized);
    }
    return success;
  }

  /** TODO: if we add anything interesting to report, we should report it here */
  @Override
  public boolean parseAndReport(List<? extends HasWord> sentence, PrintWriter pwErr) {
    boolean success = parse(sentence);
    // System.err.println(getBestTransitionSequence());
    // System.err.println(getBestBinarizedParse());
    return success;
  }

  public Tree getBestBinarizedParse() {
    return finalState.stack.peek();
  }

  public List<Transition> getBestTransitionSequence() {
    return finalState.transitions.asList();
  }

  @Override
  public double getPCFGScore() {
    return finalState.score;
  }

  @Override
  public Tree getBestParse() {
    return debinarized;
  }

  /** TODO: can we get away with not calling this PCFG? */
  @Override
  public Tree getBestPCFGParse() {
    return debinarized;
  }

  @Override
  public Tree getBestDependencyParse(boolean debinarize) {
    return null;
  }

  @Override
  public Tree getBestFactoredParse() {
    return null;
  }

  /** TODO: if this is a beam, return all equal parses */
  @Override
  public List<ScoredObject<Tree>> getBestPCFGParses() {
    ScoredObject<Tree> parse = new ScoredObject<>(debinarized, finalState.score);
    return Collections.singletonList(parse);
  }

  @Override
  public boolean hasFactoredParse() {
    return false;
  }

  /** TODO: return more if this used a beam */
  @Override
  public List<ScoredObject<Tree>> getKBestPCFGParses(int kbestPCFG) {
    ScoredObject<Tree> parse = new ScoredObject<>(debinarized, finalState.score);
    return Collections.singletonList(parse);
  }

  @Override
  public List<ScoredObject<Tree>> getKGoodFactoredParses(int kbest) {
    throw new UnsupportedOperationException();
  }

  @Override
  public KBestViterbiParser getPCFGParser() {
    // TODO: find some way to treat this as a KBestViterbiParser?
    return null;
  }

  @Override
  public KBestViterbiParser getDependencyParser() {
    return null;
  }

  @Override
  public KBestViterbiParser getFactoredParser() {
    return null;
  }

  @Override
  public void setConstraints(List<ParserConstraint> constraints) {
    this.constraints = constraints;
  }

  @Override
  public boolean saidMemMessage() {
    return false;
  }

  @Override
  public boolean parseSucceeded() {
    return success;
  }

  /** TODO: skip sentences which are too long */
  @Override
  public boolean parseSkipped() {
    return false;
  }

  @Override
  public boolean parseFallback() {
    return false;
  }

  /** TODO: add memory handling? */
  @Override
  public boolean parseNoMemory() {
    return false;
  }

  @Override
  public boolean parseUnparsable() {
    return unparsable;
  }

  @Override
  public List<? extends HasWord> originalSentence() {
    return originalSentence;
  }

  /** TODO: clearly this should be a default method in ParserQuery once Java 8 comes out */
  @Override
  public void restoreOriginalWords(Tree tree) {
    if (originalSentence == null || tree == null) {
      return;
    }
    List<Tree> leaves = tree.getLeaves();
    if (leaves.size() != originalSentence.size()) {
      throw new IllegalStateException(
          "originalWords and sentence of different sizes: "
              + originalSentence.size()
              + " vs. "
              + leaves.size()
              + "\n Orig: "
              + Sentence.listToString(originalSentence)
              + "\n Pars: "
              + Sentence.listToString(leaves));
    }
    // TODO: get rid of this cast
    Iterator<? extends Label> wordsIterator =
        (Iterator<? extends Label>) originalSentence.iterator();
    for (Tree leaf : leaves) {
      leaf.setLabel(wordsIterator.next());
    }
  }
}