Пример #1
0
 private void prettyPrint(PrintWriter pw, int indent) {
   for (int i = 0; i < indent; i++) {
     pw.print("   ");
   }
   pw.println(localString());
   for (SemgrexPattern child : getChildren()) {
     child.prettyPrint(pw, indent + 1);
   }
 }
Пример #2
0
 /**
  * Creates a pattern from the given string.
  *
  * @param semgrex the pattern string
  * @return a SemgrexPattern for the string.
  */
 public static SemgrexPattern compile(String semgrex, Env env) {
   try {
     SemgrexParser parser = new SemgrexParser(new StringReader(semgrex + "\n"));
     SemgrexPattern newPattern = parser.Root();
     newPattern.env = env;
     newPattern.patternString = semgrex;
     return newPattern;
   } catch (ParseException ex) {
     throw new SemgrexParseException("Error parsing semgrex pattern " + semgrex, ex);
   } catch (TokenMgrError er) {
     throw new SemgrexParseException("Error parsing semgrex pattern " + semgrex, er);
   }
 }
Пример #3
0
  /**
   * Prints out all matches of a semgrex pattern on a file of dependencies. <br>
   * Usage:<br>
   * java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args] <br>
   * See the help() function for a list of possible arguments to provide.
   */
  public static void main(String[] args) throws IOException {
    Map<String, Integer> flagMap = Generics.newHashMap();

    flagMap.put(PATTERN, 1);
    flagMap.put(TREE_FILE, 1);
    flagMap.put(MODE, 1);
    flagMap.put(EXTRAS, 1);
    flagMap.put(CONLLU_FILE, 1);
    flagMap.put(OUTPUT_FORMAT_OPTION, 1);

    Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
    args = argsMap.get(null);

    // TODO: allow patterns to be extracted from a file
    if (!(argsMap.containsKey(PATTERN)) || argsMap.get(PATTERN).length == 0) {
      help();
      System.exit(2);
    }
    SemgrexPattern semgrex = SemgrexPattern.compile(argsMap.get(PATTERN)[0]);

    String modeString = DEFAULT_MODE;
    if (argsMap.containsKey(MODE) && argsMap.get(MODE).length > 0) {
      modeString = argsMap.get(MODE)[0].toUpperCase();
    }
    SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.valueOf(modeString);

    String outputFormatString = DEFAULT_OUTPUT_FORMAT;
    if (argsMap.containsKey(OUTPUT_FORMAT_OPTION) && argsMap.get(OUTPUT_FORMAT_OPTION).length > 0) {
      outputFormatString = argsMap.get(OUTPUT_FORMAT_OPTION)[0].toUpperCase();
    }
    OutputFormat outputFormat = OutputFormat.valueOf(outputFormatString);

    boolean useExtras = true;
    if (argsMap.containsKey(EXTRAS) && argsMap.get(EXTRAS).length > 0) {
      useExtras = Boolean.valueOf(argsMap.get(EXTRAS)[0]);
    }

    List<SemanticGraph> graphs = Generics.newArrayList();
    // TODO: allow other sources of graphs, such as dependency files
    if (argsMap.containsKey(TREE_FILE) && argsMap.get(TREE_FILE).length > 0) {
      for (String treeFile : argsMap.get(TREE_FILE)) {
        System.err.println("Loading file " + treeFile);
        MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer());
        treebank.loadPath(treeFile);
        for (Tree tree : treebank) {
          // TODO: allow other languages... this defaults to English
          SemanticGraph graph =
              SemanticGraphFactory.makeFromTree(
                  tree,
                  mode,
                  useExtras
                      ? GrammaticalStructure.Extras.MAXIMAL
                      : GrammaticalStructure.Extras.NONE,
                  true);
          graphs.add(graph);
        }
      }
    }

    if (argsMap.containsKey(CONLLU_FILE) && argsMap.get(CONLLU_FILE).length > 0) {
      CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
      for (String conlluFile : argsMap.get(CONLLU_FILE)) {
        System.err.println("Loading file " + conlluFile);
        Iterator<SemanticGraph> it = reader.getIterator(IOUtils.readerFromString(conlluFile));

        while (it.hasNext()) {
          SemanticGraph graph = it.next();
          graphs.add(graph);
        }
      }
    }

    for (SemanticGraph graph : graphs) {
      SemgrexMatcher matcher = semgrex.matcher(graph);
      if (!(matcher.find())) {
        continue;
      }

      if (outputFormat == OutputFormat.LIST) {
        System.err.println("Matched graph:");
        System.err.println(graph.toString(SemanticGraph.OutputFormat.LIST));
        boolean found = true;
        while (found) {
          System.err.println(
              "Matches at: " + matcher.getMatch().value() + "-" + matcher.getMatch().index());
          List<String> nodeNames = Generics.newArrayList();
          nodeNames.addAll(matcher.getNodeNames());
          Collections.sort(nodeNames);
          for (String name : nodeNames) {
            System.err.println(
                "  "
                    + name
                    + ": "
                    + matcher.getNode(name).value()
                    + "-"
                    + matcher.getNode(name).index());
          }
          System.err.println();
          found = matcher.find();
        }
      } else if (outputFormat == OutputFormat.OFFSET) {
        if (graph.vertexListSorted().isEmpty()) {
          continue;
        }
        System.out.printf(
            "+%d %s%n",
            graph.vertexListSorted().get(0).get(CoreAnnotations.LineNumberAnnotation.class),
            argsMap.get(CONLLU_FILE)[0]);
      }
    }
  }