コード例 #1
0
 private static void testTransAndUntrans(
     CharacterLevelTagExtender e, Treebank tb, PrintWriter pw) {
   for (Tree tree : tb) {
     Tree oldTree = tree.treeSkeletonCopy();
     e.transformTree(tree);
     CharacterLevelTagExtender.untransformTree(tree);
     if (!tree.equals(oldTree)) {
       pw.println("NOT EQUAL AFTER UNTRANSFORMATION!!!");
       pw.println();
       oldTree.pennPrint(pw);
       pw.println();
       tree.pennPrint(pw);
       pw.println("------------------");
     }
   }
 }
コード例 #2
0
ファイル: Tsurgeon.java プロジェクト: nitish11/CoreNLP
 private static void displayTree(Tree t, TreePrint tp, PrintWriter pw) {
   if (t == null) {
     pw.println("null");
   } else {
     tp.printTree(t, pw);
   }
 }
コード例 #3
0
ファイル: TregexPattern.java プロジェクト: hans/CoreNLP
 private void prettyPrint(PrintWriter pw, int indent) {
   for (int i = 0; i < indent; i++) {
     pw.print("   ");
   }
   if (neg) {
     pw.print('!');
   }
   if (opt) {
     pw.print('?');
   }
   pw.println(localString());
   for (TregexPattern child : getChildren()) {
     child.prettyPrint(pw, indent + 1);
   }
 }
コード例 #4
0
ファイル: Tsurgeon.java プロジェクト: nitish11/CoreNLP
  /**
   * Usage: java edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon [-s] -treeFile file-with-trees [-po
   * matching-pattern operation] operation-file-1 operation-file-2 ... operation-file-n
   *
   * <h4>Arguments:</h4>
   *
   * Each argument should be the name of a transformation file that contains a list of pattern and
   * transformation operation list pairs. That is, it is a sequence of pairs of a {@link
   * TregexPattern} pattern on one or more lines, then a blank line (empty or whitespace), then a
   * list of transformation operations one per line (as specified by <b>Legal operation syntax</b>
   * below) to apply when the pattern is matched, and then another blank line (empty or whitespace).
   * Note the need for blank lines: The code crashes if they are not present as separators (although
   * the blank line at the end of the file can be omitted). The script file can include comment
   * lines, either whole comment lines or trailing comments introduced by %, which extend to the end
   * of line. A needed percent mark can be escaped by a preceding backslash.
   *
   * <p>For example, if you want to excise an SBARQ node whenever it is the parent of an SQ node,
   * and relabel the SQ node to S, your transformation file would look like this:
   *
   * <blockquote>
   *
   * <code>
   *    SBARQ=n1 &lt; SQ=n2<br>
   *    <br>
   *    excise n1 n1<br>
   *    relabel n2 S
   * </code>
   *
   * </blockquote>
   *
   * <p>
   *
   * <h4>Options:</h4>
   *
   * <ul>
   *   <li><code>-treeFile &#60;filename&#62;</code> specify the name of the file that has the trees
   *       you want to transform.
   *   <li><code>-po &#60;matchPattern&#62; &#60;operation&#62;</code> Apply a single operation to
   *       every tree using the specified match pattern and the specified operation. Use this option
   *       when you want to quickly try the effect of one pattern/surgery combination, and are too
   *       lazy to write a transformation file.
   *   <li><code>-s</code> Print each output tree on one line (default is pretty-printing).
   *   <li><code>-m</code> For every tree that had a matching pattern, print "before" (prepended as
   *       "Operated on:") and "after" (prepended as "Result:"). Unoperated trees just pass through
   *       the transducer as usual.
   *   <li><code>-encoding X</code> Uses character set X for input and output of trees.
   *   <li><code>-macros &#60;filename&#62;</code> A file of macros to use on the tregex pattern.
   *       Macros should be one per line, with original and replacement separated by tabs.
   *   <li><code>-hf &lt;headfinder-class-name&gt;</code> use the specified {@link HeadFinder} class
   *       to determine headship relations.
   *   <li><code>-hfArg &lt;string&gt;</code> pass a string argument in to the {@link HeadFinder}
   *       class's constructor. <code>-hfArg</code> can be used multiple times to pass in multiple
   *       arguments.
   *   <li><code>-trf &lt;TreeReaderFactory-class-name&gt;</code> use the specified {@link
   *       TreeReaderFactory} class to read trees from files.
   * </ul>
   *
   * <h4>Legal operation syntax:</h4>
   *
   * <ul>
   *   <li><code>delete &#60;name&#62;</code> deletes the node and everything below it.
   *   <li><code>prune &#60;name&#62;</code> Like delete, but if, after the pruning, the parent has
   *       no children anymore, the parent is pruned too. Pruning continues to affect all ancestors
   *       until one is found with remaining children. This may result in a null tree.
   *   <li><code>excise &#60;name1&#62; &#60;name2&#62;</code> The name1 node should either dominate
   *       or be the same as the name2 node. This excises out everything from name1 to name2. All
   *       the children of name2 go into the parent of name1, where name1 was.
   *   <li><code>relabel &#60;name&#62; &#60;new-label&#62;</code> Relabels the node to have the new
   *       label. <br>
   *       There are three possible forms: <br>
   *       <code>relabel nodeX VP</code> - for changing a node label to an alphanumeric string <br>
   *       <code>relabel nodeX /''/</code> - for relabeling a node to something that isn't a valid
   *       identifier without quoting <br>
   *       <code>relabel nodeX /^VB(.*)$/verb\\/$1/</code> - for regular expression based
   *       relabeling. In this case, all matches of the regular expression against the node label
   *       are replaced with the replacement String. This has the semantics of Java/Perl's
   *       replaceAll: you may use capturing groups and put them in replacements with $n. For
   *       example, if the pattern is /foo/bar/ and the node matched is "foo", the replaceAll
   *       semantics result in "barbar". If the pattern is /^foo(.*)$/bar$1/ and node matched is
   *       "foofoo", relabel will result in "barfoo". <br>
   *       When using the regex replacement method, you can also use the sequences ={node} and
   *       %{var} in the replacement string to use captured nodes or variable strings in the
   *       replacement string. For example, if the Tregex pattern was "duck=bar" and the relabel is
   *       /foo/={bar}/, "foofoo" will be replaced with "duckduck". <br>
   *       To concatenate two nodes named in the tregex pattern, for example, you can use the
   *       pattern /^.*$/={foo}={bar}/. Note that the ^.*$ is necessary to make sure the regex
   *       pattern only matches and replaces once on the entire node name. <br>
   *       To get an "=" or a "%" in the replacement, using \ escaping. Also, as in the example you
   *       can escape a slash in the middle of the second and third forms with \\/ and \\\\. <br>
   *   <li><code>insert &#60;name&#62; &#60;position&#62;</code> or <code>
   *       insert &lt;tree&gt; &#60;position&#62;</code> inserts the named node or tree into the
   *       position specified.
   *   <li><code>move &#60;name&#62; &#60;position&#62;</code> moves the named node into the
   *       specified position.
   *       <p>Right now the only ways to specify position are:
   *       <p><code>$+ &#60;name&#62;</code> the left sister of the named node<br>
   *       <code>$- &#60;name&#62;</code> the right sister of the named node<br>
   *       <code>&gt;i &#60;name&#62;</code> the i_th daughter of the named node<br>
   *       <code>&gt;-i &#60;name&#62;</code> the i_th daughter, counting from the right, of the
   *       named node.
   *   <li><code>replace &#60;name1&#62; &#60;name2&#62;</code> deletes name1 and inserts a copy of
   *       name2 in its place.
   *   <li><code>replace &#60;name&#62; &#60;tree&#62; &#60;tree2&#62;...</code> deletes name and
   *       inserts the new tree(s) in its place. If more than one replacement tree is given, each of
   *       the new subtrees will be added in order where the old tree was. Multiple subtrees at the
   *       root is an illegal operation and will throw an exception.
   *   <li>{@code createSubtree <new-label> <name1> [<name2>]} Create a subtree out of all the nodes
   *       from {@code <name1>} through {@code <name2>} and puts the new subtree where that span
   *       used to be. To limit the operation to just one node, elide {@code <name2>}.
   *   <li><code>adjoin &#60;auxiliary_tree&#62; &lt;name&gt;</code> Adjoins the specified auxiliary
   *       tree into the named node. The daughters of the target node will become the daughters of
   *       the foot of the auxiliary tree.
   *   <li><code>adjoinH &#60;auxiliary_tree&#62; &lt;name&gt;</code> Similar to adjoin, but
   *       preserves the target node and makes it the root of &lt;tree&gt;. (It is still accessible
   *       as <code>name</code>. The root of the auxiliary tree is ignored.)
   *   <li><code>adjoinF &#60;auxiliary_tree&#62; &lt;name&gt;</code> Similar to adjoin, but
   *       preserves the target node and makes it the foot of &lt;tree&gt;. (It is still accessible
   *       as <code>name</code>, and retains its status as parent of its children. The root of the
   *       auxiliary tree is ignored.)
   *   <li>
   *   <dt><code>coindex &#60;name1&#62; &#60;name2&#62; ... &#60;nameM&#62; </code> Puts a (Penn
   *       Treebank style) coindexation suffix of the form "-N" on each of nodes name_1 through
   *       name_m. The value of N will be automatically generated in reference to the existing
   *       coindexations in the tree, so that there is never an accidental clash of indices across
   *       things that are not meant to be coindexed.
   * </ul>
   *
   * <p>In the context of <code>adjoin</code>, <code>adjoinH</code>, and <code>adjoinF</code>, an
   * auxiliary tree is a tree in Penn Treebank format with <code>@</code> on exactly one of the
   * leaves denoting the foot of the tree. The operations which use the foot use the labeled node.
   * For example: <br>
   * Tsurgeon: <code>adjoin (FOO (BAR@)) foo</code> <br>
   * Tregex: <code>B=foo</code> <br>
   * Input: <code>(A (B 1 2))</code> Output: <code>(A (FOO (BAR 1 2)))</code>
   *
   * <p>Tsurgeon applies the same operation to the same tree for as long as the given tregex
   * operation matches. This means that infinite loops are very easy to cause. One common situation
   * where this comes up is with an insert operation will repeats infinitely many times unless you
   * add an expression to the tregex that matches against the inserted pattern. For example, this
   * pattern will infinite loop:
   *
   * <blockquote>
   *
   * <code>
   *   TregexPattern tregex = TregexPattern.compile("S=node &lt;&lt; NP"); <br>
   *   TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("insert (NP foo) &gt;-1 node");
   * </code>
   *
   * </blockquote>
   *
   * This pattern, though, will terminate:
   *
   * <blockquote>
   *
   * <code>
   *   TregexPattern tregex = TregexPattern.compile("S=node &lt;&lt; NP !&lt;&lt; foo"); <br>
   *   TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("insert (NP foo) &gt;-1 node");
   * </code>
   *
   * </blockquote>
   *
   * <p>Tsurgeon has (very) limited support for conditional statements. If a pattern is prefaced
   * with <code>if exists &lt;name&gt;</code>, the rest of the pattern will only execute if the
   * named node was found in the corresponding TregexMatcher.
   *
   * @param args a list of names of files each of which contains a single tregex matching pattern
   *     plus a list, one per line, of transformation operations to apply to the matched pattern.
   * @throws Exception If an I/O or pattern syntax error
   */
  public static void main(String[] args) throws Exception {
    String headFinderClassName = null;
    String headFinderOption = "-hf";
    String[] headFinderArgs = null;
    String headFinderArgOption = "-hfArg";
    String encoding = "UTF-8";
    String encodingOption = "-encoding";
    if (args.length == 0) {
      System.err.println(
          "Usage: java edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon [-s] -treeFile <file-with-trees> [-po <matching-pattern> <operation>] <operation-file-1> <operation-file-2> ... <operation-file-n>");
      System.exit(0);
    }
    String treePrintFormats;
    String singleLineOption = "-s";
    String verboseOption = "-v";
    String matchedOption =
        "-m"; // if set, then print original form of trees that are matched & thus operated on
    String patternOperationOption = "-po";
    String treeFileOption = "-treeFile";
    String trfOption = "-trf";
    String macroOption = "-macros";
    String macroFilename = "";
    Map<String, Integer> flagMap = Generics.newHashMap();
    flagMap.put(patternOperationOption, 2);
    flagMap.put(treeFileOption, 1);
    flagMap.put(trfOption, 1);
    flagMap.put(singleLineOption, 0);
    flagMap.put(encodingOption, 1);
    flagMap.put(headFinderOption, 1);
    flagMap.put(macroOption, 1);
    Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
    args = argsMap.get(null);

    if (argsMap.containsKey(headFinderOption))
      headFinderClassName = argsMap.get(headFinderOption)[0];
    if (argsMap.containsKey(headFinderArgOption)) headFinderArgs = argsMap.get(headFinderArgOption);
    if (argsMap.containsKey(verboseOption)) verbose = true;
    if (argsMap.containsKey(singleLineOption)) treePrintFormats = "oneline,";
    else treePrintFormats = "penn,";
    if (argsMap.containsKey(encodingOption)) encoding = argsMap.get(encodingOption)[0];
    if (argsMap.containsKey(macroOption)) macroFilename = argsMap.get(macroOption)[0];

    TreePrint tp = new TreePrint(treePrintFormats, new PennTreebankLanguagePack());
    PrintWriter pwOut = new PrintWriter(new OutputStreamWriter(System.out, encoding), true);

    TreeReaderFactory trf;
    if (argsMap.containsKey(trfOption)) {
      String trfClass = argsMap.get(trfOption)[0];
      trf = ReflectionLoading.loadByReflection(trfClass);
    } else {
      trf = new TregexPattern.TRegexTreeReaderFactory();
    }

    Treebank trees = new DiskTreebank(trf, encoding);
    if (argsMap.containsKey(treeFileOption)) {
      trees.loadPath(argsMap.get(treeFileOption)[0]);
    }
    List<Pair<TregexPattern, TsurgeonPattern>> ops =
        new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();

    TregexPatternCompiler compiler;
    if (headFinderClassName == null) {
      compiler = new TregexPatternCompiler();
    } else {
      HeadFinder hf;
      if (headFinderArgs == null) {
        hf = ReflectionLoading.loadByReflection(headFinderClassName);
      } else {
        hf = ReflectionLoading.loadByReflection(headFinderClassName, (Object[]) headFinderArgs);
      }
      compiler = new TregexPatternCompiler(hf);
    }
    Macros.addAllMacros(compiler, macroFilename, encoding);
    if (argsMap.containsKey(patternOperationOption)) {
      TregexPattern matchPattern = compiler.compile(argsMap.get(patternOperationOption)[0]);
      TsurgeonPattern p = parseOperation(argsMap.get(patternOperationOption)[1]);
      ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    } else {
      for (String arg : args) {
        List<Pair<TregexPattern, TsurgeonPattern>> pairs =
            getOperationsFromFile(arg, encoding, compiler);
        for (Pair<TregexPattern, TsurgeonPattern> pair : pairs) {
          if (verbose) {
            System.err.println(pair.second());
          }
          ops.add(pair);
        }
      }
    }

    for (Tree t : trees) {
      Tree original = t.deepCopy();
      Tree result = processPatternsOnTree(ops, t);
      if (argsMap.containsKey(matchedOption) && matchedOnTree) {
        pwOut.println("Operated on: ");
        displayTree(original, tp, pwOut);
        pwOut.println("Result: ");
      }
      displayTree(result, tp, pwOut);
    }
  }
コード例 #5
0
  /**
   * for testing -- CURRENTLY BROKEN!!!
   *
   * @param args input dir and output filename
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {
    if (args.length != 3) {
      throw new RuntimeException("args: treebankPath trainNums testNums");
    }

    ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();
    ctpp.charTags = true;
    // TODO: these options are getting clobbered by reading in the
    // parser object (unless it's a text file parser?)
    Options op = new Options(ctpp);
    op.doDep = false;
    op.testOptions.maxLength = 90;

    LexicalizedParser lp;
    try {
      FileFilter trainFilt = new NumberRangesFileFilter(args[1], false);

      lp = LexicalizedParser.trainFromTreebank(args[0], trainFilt, op);
      try {
        String filename = "chineseCharTagPCFG.ser.gz";
        System.err.println("Writing parser in serialized format to file " + filename + ' ');
        System.err.flush();
        ObjectOutputStream out = IOUtils.writeStreamFromString(filename);

        out.writeObject(lp);
        out.close();
        System.err.println("done.");
      } catch (IOException ioe) {
        ioe.printStackTrace();
      }
    } catch (IllegalArgumentException e) {
      lp = LexicalizedParser.loadModel(args[1], op);
    }

    FileFilter testFilt = new NumberRangesFileFilter(args[2], false);
    MemoryTreebank testTreebank = ctpp.memoryTreebank();
    testTreebank.loadPath(new File(args[0]), testFilt);
    PrintWriter pw =
        new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true);
    WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
    WordCatEqualityChecker eqcheck = new WordCatEqualityChecker();
    EquivalenceClassEval eval = new EquivalenceClassEval(eqclass, eqcheck);
    //    System.out.println("Preterminals:" + preterminals);
    System.out.println("Testing...");
    for (Tree gold : testTreebank) {
      Tree tree;
      try {
        tree = lp.parseTree(gold.yieldHasWord());
        if (tree == null) {
          System.out.println("Failed to parse " + gold.yieldHasWord());
          continue;
        }
      } catch (Exception e) {
        e.printStackTrace();
        continue;
      }
      gold = gold.firstChild();
      pw.println(Sentence.listToString(gold.preTerminalYield()));
      pw.println(Sentence.listToString(gold.yield()));
      gold.pennPrint(pw);

      pw.println(tree.preTerminalYield());
      pw.println(tree.yield());
      tree.pennPrint(pw);
      //      Collection allBrackets = WordCatConstituent.allBrackets(tree);
      //      Collection goldBrackets = WordCatConstituent.allBrackets(gold);
      //      eval.eval(allBrackets, goldBrackets);
      eval.displayLast();
    }
    System.out.println();
    System.out.println();
    eval.display();
  }
コード例 #6
0
ファイル: TregexPattern.java プロジェクト: hans/CoreNLP
 // todo: add an option to only print each tree once, regardless.  Most useful in conjunction
 // with -w
 public void visitTree(Tree t) {
   treeNumber++;
   if (printTree) {
     pw.print(treeNumber + ":");
     pw.println("Next tree read:");
     tp.printTree(t, pw);
   }
   TregexMatcher match = p.matcher(t);
   if (printNonMatchingTrees) {
     if (match.find()) numMatches++;
     else tp.printTree(t, pw);
     return;
   }
   Tree lastMatchingRootNode = null;
   while (match.find()) {
     if (oneMatchPerRootNode) {
       if (lastMatchingRootNode == match.getMatch()) continue;
       else lastMatchingRootNode = match.getMatch();
     }
     numMatches++;
     if (printFilename && treebank instanceof DiskTreebank) {
       DiskTreebank dtb = (DiskTreebank) treebank;
       pw.print("# ");
       pw.println(dtb.getCurrentFilename());
     }
     if (printSubtreeCode) {
       pw.print(treeNumber);
       pw.print(':');
       pw.println(match.getMatch().nodeNumber(t));
     }
     if (printMatches) {
       if (reportTreeNumbers) {
         pw.print(treeNumber);
         pw.print(": ");
       }
       if (printTree) {
         pw.println("Found a full match:");
       }
       if (printWholeTree) {
         tp.printTree(t, pw);
       } else if (handles != null) {
         if (printTree) {
           pw.println("Here's the node you were interested in:");
         }
         for (String handle : handles) {
           Tree labeledNode = match.getNode(handle);
           if (labeledNode == null) {
             System.err.println(
                 "Error!!  There is no matched node \""
                     + handle
                     + "\"!  Did you specify such a label in the pattern?");
           } else {
             tp.printTree(labeledNode, pw);
           }
         }
       } else {
         tp.printTree(match.getMatch(), pw);
       }
       // pw.println();  // TreePrint already puts a blank line in
     } // end if (printMatches)
   } // end while match.find()
 } // end visitTree
コード例 #7
0
ファイル: TregexPattern.java プロジェクト: hans/CoreNLP
  /**
   * Prints out all matches of a tree pattern on each tree in the path. Usage: <br>
   * <br>
   * <code>
   * java edu.stanford.nlp.trees.tregex.TregexPattern [[-TCwfosnu] [-filter] [-h &lt;node-name&gt;]]* pattern
   *  filepath   </code>
   *
   * <p>Arguments:<br>
   *
   * <ul>
   *   <li><code>pattern</code>: the tree pattern which optionally names some set of nodes (i.e.,
   *       gives it the "handle") <code>=name</code> (for some arbitrary string "name")
   *   <li><code>filepath</code>: the path to files with trees. If this is a directory, there will
   *       be recursive descent and the pattern will be run on all files beneath the specified
   *       directory.
   * </ul>
   *
   * <p>Options:<br>
   * <li><code>-C</code> suppresses printing of matches, so only the number of matches is printed.
   * <li><code>-w</code> causes the whole of a tree that matches to be printed.
   * <li><code>-f</code> causes the filename to be printed.
   * <li><code>-i &lt;filename&gt;</code> causes the pattern to be matched to be read from <code>
   *     &lt;filename&gt;</code> rather than the command line. Don't specify a pattern when this
   *     option is used.
   * <li><code>-o</code> Specifies that each tree node can be reported only once as the root of a
   *     match (by default a node will be printed once for every <em>way</em> the pattern matches).
   * <li><code>-s</code> causes trees to be printed all on one line (by default they are pretty
   *     printed).
   * <li><code>-n</code> causes the number of the tree in which the match was found to be printed
   *     before every match.
   * <li><code>-u</code> causes only the label of each matching node to be printed, not complete
   *     subtrees.
   * <li><code>-t</code> causes only the yield (terminal words) of the selected node to be printed
   *     (or the yield of the whole tree, if the <code>-w</code> option is used).
   * <li><code>-encoding &lt;charset_encoding&gt;</code> option allows specification of character
   *     encoding of trees..
   * <li><code>-h &lt;node-handle&gt;</code> If a <code>-h</code> option is given, the root tree
   *     node will not be printed. Instead, for each <code>node-handle</code> specified, the node
   *     matched and given that handle will be printed. Multiple nodes can be printed by using the
   *     <code>-h</code> option multiple times on a single command line.
   * <li><code>-hf &lt;headfinder-class-name&gt;</code> use the specified {@link HeadFinder} class
   *     to determine headship relations.
   * <li><code>-hfArg &lt;string&gt;</code> pass a string argument in to the {@link HeadFinder}
   *     class's constructor. <code>-hfArg</code> can be used multiple times to pass in multiple
   *     arguments.
   * <li><code>-trf &lt;TreeReaderFactory-class-name&gt;</code> use the specified {@link
   *     TreeReaderFactory} class to read trees from files.
   * <li><code>-v</code> print every tree that contains no matches of the specified pattern, but
   *     print no matches to the pattern.
   * <li><code>-x</code> Instead of the matched subtree, print the matched subtree's identifying
   *     number as defined in <tt>tgrep2</tt>:a unique identifier for the subtree and is in the form
   *     s:n, where s is an integer specifying the sentence number in the corpus (starting with 1),
   *     and n is an integer giving the order in which the node is encountered in a depth-first
   *     search starting with 1 at top node in the sentence tree.
   * <li><code>-extract &lt;code&gt; &lt;tree-file&gt;</code> extracts the subtree s:n specified by
   *     <tt>code</tt> from the specified <tt>tree-file</tt>. Overrides all other behavior of
   *     tregex. Can't specify multiple encodings etc. yet.
   * <li><code>-extractFile &lt;code-file&gt; &lt;tree-file&gt;</code> extracts every subtree
   *     specified by the subtree codes in <tt>code-file</tt>, which must appear exactly one per
   *     line, from the specified <tt>tree-file</tt>. Overrides all other behavior of tregex. Can't
   *     specify multiple encodings etc. yet.
   * <li><code>-filter</code> causes this to act as a filter, reading tree input from stdin
   * <li><code>-T</code> causes all trees to be printed as processed (for debugging purposes).
   *     Otherwise only matching nodes are printed.
   * <li><code>-macros &lt;filename&gt;</code> filename with macro substitutions to use. file with
   *     tab separated lines original-tab-replacement
   * </ul>
   */
  public static void main(String[] args) throws IOException {
    Timing.startTime();

    StringBuilder treePrintFormats = new StringBuilder();
    String printNonMatchingTreesOption = "-v";
    String subtreeCodeOption = "-x";
    String extractSubtreesOption = "-extract";
    String extractSubtreesFileOption = "-extractFile";
    String inputFileOption = "-i";
    String headFinderOption = "-hf";
    String headFinderArgOption = "-hfArg";
    String trfOption = "-trf";
    String headFinderClassName = null;
    String[] headFinderArgs = StringUtils.EMPTY_STRING_ARRAY;
    String treeReaderFactoryClassName = null;
    String printHandleOption = "-h";
    String markHandleOption = "-k";
    String encodingOption = "-encoding";
    String encoding = "UTF-8";
    String macroOption = "-macros";
    String macroFilename = "";
    String yieldOnly = "-t";
    String printAllTrees = "-T";
    String quietMode = "-C";
    String wholeTreeMode = "-w";
    String filenameOption = "-f";
    String oneMatchPerRootNodeMode = "-o";
    String reportTreeNumbers = "-n";
    String rootLabelOnly = "-u";
    String oneLine = "-s";
    Map<String, Integer> flagMap = Generics.newHashMap();
    flagMap.put(extractSubtreesOption, 2);
    flagMap.put(extractSubtreesFileOption, 2);
    flagMap.put(subtreeCodeOption, 0);
    flagMap.put(printNonMatchingTreesOption, 0);
    flagMap.put(encodingOption, 1);
    flagMap.put(inputFileOption, 1);
    flagMap.put(printHandleOption, 1);
    flagMap.put(markHandleOption, 2);
    flagMap.put(headFinderOption, 1);
    flagMap.put(headFinderArgOption, 1);
    flagMap.put(trfOption, 1);
    flagMap.put(macroOption, 1);
    flagMap.put(yieldOnly, 0);
    flagMap.put(quietMode, 0);
    flagMap.put(wholeTreeMode, 0);
    flagMap.put(printAllTrees, 0);
    flagMap.put(filenameOption, 0);
    flagMap.put(oneMatchPerRootNodeMode, 0);
    flagMap.put(reportTreeNumbers, 0);
    flagMap.put(rootLabelOnly, 0);
    flagMap.put(oneLine, 0);
    Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
    args = argsMap.get(null);

    if (argsMap.containsKey(encodingOption)) {
      encoding = argsMap.get(encodingOption)[0];
      System.err.println("Encoding set to " + encoding);
    }
    PrintWriter errPW = new PrintWriter(new OutputStreamWriter(System.err, encoding), true);

    if (argsMap.containsKey(extractSubtreesOption)) {
      List<String> subTreeStrings =
          Collections.singletonList(argsMap.get(extractSubtreesOption)[0]);
      extractSubtrees(subTreeStrings, argsMap.get(extractSubtreesOption)[1]);
      return;
    }
    if (argsMap.containsKey(extractSubtreesFileOption)) {
      List<String> subTreeStrings =
          Arrays.asList(
              IOUtils.slurpFile(argsMap.get(extractSubtreesFileOption)[0]).split("\n|\r|\n\r"));
      extractSubtrees(subTreeStrings, argsMap.get(extractSubtreesFileOption)[0]);
      return;
    }

    if (args.length < 1) {
      errPW.println(
          "Usage: java edu.stanford.nlp.trees.tregex.TregexPattern [-T] [-C] [-w] [-f] [-o] [-n] [-s] [-filter]  [-hf class] [-trf class] [-h handle]* pattern [filepath]");
      return;
    }
    String matchString = args[0];

    if (argsMap.containsKey(macroOption)) {
      macroFilename = argsMap.get(macroOption)[0];
    }
    if (argsMap.containsKey(headFinderOption)) {
      headFinderClassName = argsMap.get(headFinderOption)[0];
      errPW.println("Using head finder " + headFinderClassName + "...");
    }
    if (argsMap.containsKey(headFinderArgOption)) {
      headFinderArgs = argsMap.get(headFinderArgOption);
    }
    if (argsMap.containsKey(trfOption)) {
      treeReaderFactoryClassName = argsMap.get(trfOption)[0];
      errPW.println("Using tree reader factory " + treeReaderFactoryClassName + "...");
    }
    if (argsMap.containsKey(printAllTrees)) {
      TRegexTreeVisitor.printTree = true;
    }
    if (argsMap.containsKey(inputFileOption)) {
      String inputFile = argsMap.get(inputFileOption)[0];
      matchString = IOUtils.slurpFile(inputFile, encoding);
      String[] newArgs = new String[args.length + 1];
      System.arraycopy(args, 0, newArgs, 1, args.length);
      args = newArgs;
    }
    if (argsMap.containsKey(quietMode)) {
      TRegexTreeVisitor.printMatches = false;
      TRegexTreeVisitor.printNumMatchesToStdOut = true;
    }
    if (argsMap.containsKey(printNonMatchingTreesOption)) {
      TRegexTreeVisitor.printNonMatchingTrees = true;
    }
    if (argsMap.containsKey(subtreeCodeOption)) {
      TRegexTreeVisitor.printSubtreeCode = true;
      TRegexTreeVisitor.printMatches = false;
    }
    if (argsMap.containsKey(wholeTreeMode)) {
      TRegexTreeVisitor.printWholeTree = true;
    }
    if (argsMap.containsKey(filenameOption)) {
      TRegexTreeVisitor.printFilename = true;
    }
    if (argsMap.containsKey(oneMatchPerRootNodeMode)) TRegexTreeVisitor.oneMatchPerRootNode = true;
    if (argsMap.containsKey(reportTreeNumbers)) TRegexTreeVisitor.reportTreeNumbers = true;
    if (argsMap.containsKey(rootLabelOnly)) {
      treePrintFormats.append(TreePrint.rootLabelOnlyFormat).append(',');
    } else if (argsMap.containsKey(oneLine)) { // display short form
      treePrintFormats.append("oneline,");
    } else if (argsMap.containsKey(yieldOnly)) {
      treePrintFormats.append("words,");
    } else {
      treePrintFormats.append("penn,");
    }

    HeadFinder hf = new CollinsHeadFinder();
    if (headFinderClassName != null) {
      Class[] hfArgClasses = new Class[headFinderArgs.length];
      for (int i = 0; i < hfArgClasses.length; i++) hfArgClasses[i] = String.class;
      try {
        hf =
            (HeadFinder)
                Class.forName(headFinderClassName)
                    .getConstructor(hfArgClasses)
                    .newInstance(
                        (Object[])
                            headFinderArgs); // cast to Object[] necessary to avoid varargs-related
        // warning.
      } catch (Exception e) {
        throw new RuntimeException("Error occurred while constructing HeadFinder: " + e);
      }
    }

    TRegexTreeVisitor.tp =
        new TreePrint(treePrintFormats.toString(), new PennTreebankLanguagePack());

    try {
      // TreePattern p = TreePattern.compile("/^S/ > S=dt $++ '' $-- ``");
      TregexPatternCompiler tpc = new TregexPatternCompiler(hf);
      Macros.addAllMacros(tpc, macroFilename, encoding);
      TregexPattern p = tpc.compile(matchString);
      errPW.println("Pattern string:\n" + p.pattern());
      errPW.println("Parsed representation:");
      p.prettyPrint(errPW);

      String[] handles = argsMap.get(printHandleOption);
      if (argsMap.containsKey("-filter")) {
        TreeReaderFactory trf = getTreeReaderFactory(treeReaderFactoryClassName);
        treebank =
            new MemoryTreebank(
                trf, encoding); // has to be in memory since we're not storing it on disk
        // read from stdin
        Reader reader = new BufferedReader(new InputStreamReader(System.in, encoding));
        ((MemoryTreebank) treebank).load(reader);
        reader.close();
      } else if (args.length == 1) {
        errPW.println("using default tree");
        TreeReader r =
            new PennTreeReader(
                new StringReader(
                    "(VP (VP (VBZ Try) (NP (NP (DT this) (NN wine)) (CC and) (NP (DT these) (NNS snails)))) (PUNCT .))"),
                new LabeledScoredTreeFactory(new StringLabelFactory()));
        Tree t = r.readTree();
        treebank = new MemoryTreebank();
        treebank.add(t);
      } else {
        int last = args.length - 1;
        errPW.println("Reading trees from file(s) " + args[last]);
        TreeReaderFactory trf = getTreeReaderFactory(treeReaderFactoryClassName);
        treebank = new DiskTreebank(trf, encoding);
        treebank.loadPath(args[last], null, true);
      }
      TRegexTreeVisitor vis = new TRegexTreeVisitor(p, handles, encoding);

      treebank.apply(vis);
      Timing.endTime();
      if (TRegexTreeVisitor.printMatches) {
        errPW.println("There were " + vis.numMatches() + " matches in total.");
      }
      if (TRegexTreeVisitor.printNumMatchesToStdOut) {
        System.out.println(vis.numMatches());
      }
    } catch (IOException e) {
      e.printStackTrace();
    } catch (TregexParseException e) {
      errPW.println("Error parsing expression: " + args[0]);
      errPW.println("Parse exception: " + e.toString());
    }
  }