예제 #1
0
 private void prettyPrint(PrintWriter pw, int indent) {
   for (int i = 0; i < indent; i++) {
     pw.print("   ");
   }
   pw.println(localString());
   for (SemgrexPattern child : getChildren()) {
     child.prettyPrint(pw, indent + 1);
   }
 }
예제 #2
0
 /**
  * Creates a pattern from the given string.
  *
  * @param semgrex the pattern string
  * @return a SemgrexPattern for the string.
  */
 public static SemgrexPattern compile(String semgrex, Env env) {
   try {
     SemgrexParser parser = new SemgrexParser(new StringReader(semgrex + "\n"));
     SemgrexPattern newPattern = parser.Root();
     newPattern.env = env;
     newPattern.patternString = semgrex;
     return newPattern;
   } catch (ParseException ex) {
     throw new SemgrexParseException("Error parsing semgrex pattern " + semgrex, ex);
   } catch (TokenMgrError er) {
     throw new SemgrexParseException("Error parsing semgrex pattern " + semgrex, er);
   }
 }
예제 #3
0
 public String toString(boolean hasPrecedence, boolean addChild) {
   StringBuilder sb = new StringBuilder();
   if (isNegated()) {
     sb.append('!');
   }
   if (isOptional()) {
     sb.append('?');
   }
   sb.append(' ');
   if (reln != null) {
     sb.append(reln.toString());
     sb.append(' ');
   }
   if (!hasPrecedence && addChild && child != null) {
     sb.append('(');
   }
   if (negDesc) {
     sb.append('!');
   }
   sb.append(descString);
   if (name != null) {
     sb.append('=').append(name);
   }
   if (addChild && child != null) {
     sb.append(' ');
     sb.append(child.toString(false));
     if (!hasPrecedence) {
       sb.append(')');
     }
   }
   return sb.toString();
 }
 public void testVPOnlyReplacedWith() {
   String conll =
       "1\treplaced\t0\tconj:and\tVBD\n"
           + "2\twith\t5\tcase\tIN\n"
           + "3\ta\t5\tdet\tDT\n"
           + "4\tdifferent\t5\tamod\tJJ\n"
           + "5\ttype\t1\tnmod:with\tNN\n"
           + "6\tof\t7\tcase\tIN\n"
           + "7\tfilter\t5\tnmod:of\tNN\n";
   // Positive case
   boolean matches = false;
   SemanticGraph tree = mkTree(conll).first;
   for (SemgrexPattern candidate : new RelationTripleSegmenter().VP_PATTERNS) {
     if (candidate.matcher(tree).matches()) {
       matches = true;
     }
   }
   assertTrue(matches);
 }
예제 #5
0
 public final SemgrexPattern ModRelation() throws ParseException {
   SemgrexPattern child;
   boolean startUnderNeg;
   switch ((jj_ntk == -1) ? jj_ntk_f() : jj_ntk) {
     case RELATION:
     case ALIGNRELN:
     case IDENTIFIER:
     case 19:
       {
         child = RelChild();
         break;
       }
     case 17:
       {
         jj_consume_token(17);
         startUnderNeg = underNegation;
         underNegation = true;
         child = RelChild();
         underNegation = startUnderNeg;
         child.negate();
         break;
       }
     case 18:
       {
         jj_consume_token(18);
         child = RelChild();
         child.makeOptional();
         break;
       }
     default:
       jj_la1[8] = jj_gen;
       jj_consume_token(-1);
       throw new ParseException();
   }
   {
     if ("" != null) return child;
   }
   throw new Error("Missing return statement in function");
 }
예제 #6
0
  /**
   * Prints out all matches of a semgrex pattern on a file of dependencies. <br>
   * Usage:<br>
   * java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args] <br>
   * See the help() function for a list of possible arguments to provide.
   */
  public static void main(String[] args) throws IOException {
    Map<String, Integer> flagMap = Generics.newHashMap();

    flagMap.put(PATTERN, 1);
    flagMap.put(TREE_FILE, 1);
    flagMap.put(MODE, 1);
    flagMap.put(EXTRAS, 1);
    flagMap.put(CONLLU_FILE, 1);
    flagMap.put(OUTPUT_FORMAT_OPTION, 1);

    Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
    args = argsMap.get(null);

    // TODO: allow patterns to be extracted from a file
    if (!(argsMap.containsKey(PATTERN)) || argsMap.get(PATTERN).length == 0) {
      help();
      System.exit(2);
    }
    SemgrexPattern semgrex = SemgrexPattern.compile(argsMap.get(PATTERN)[0]);

    String modeString = DEFAULT_MODE;
    if (argsMap.containsKey(MODE) && argsMap.get(MODE).length > 0) {
      modeString = argsMap.get(MODE)[0].toUpperCase();
    }
    SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.valueOf(modeString);

    String outputFormatString = DEFAULT_OUTPUT_FORMAT;
    if (argsMap.containsKey(OUTPUT_FORMAT_OPTION) && argsMap.get(OUTPUT_FORMAT_OPTION).length > 0) {
      outputFormatString = argsMap.get(OUTPUT_FORMAT_OPTION)[0].toUpperCase();
    }
    OutputFormat outputFormat = OutputFormat.valueOf(outputFormatString);

    boolean useExtras = true;
    if (argsMap.containsKey(EXTRAS) && argsMap.get(EXTRAS).length > 0) {
      useExtras = Boolean.valueOf(argsMap.get(EXTRAS)[0]);
    }

    List<SemanticGraph> graphs = Generics.newArrayList();
    // TODO: allow other sources of graphs, such as dependency files
    if (argsMap.containsKey(TREE_FILE) && argsMap.get(TREE_FILE).length > 0) {
      for (String treeFile : argsMap.get(TREE_FILE)) {
        System.err.println("Loading file " + treeFile);
        MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer());
        treebank.loadPath(treeFile);
        for (Tree tree : treebank) {
          // TODO: allow other languages... this defaults to English
          SemanticGraph graph =
              SemanticGraphFactory.makeFromTree(
                  tree,
                  mode,
                  useExtras
                      ? GrammaticalStructure.Extras.MAXIMAL
                      : GrammaticalStructure.Extras.NONE,
                  true);
          graphs.add(graph);
        }
      }
    }

    if (argsMap.containsKey(CONLLU_FILE) && argsMap.get(CONLLU_FILE).length > 0) {
      CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
      for (String conlluFile : argsMap.get(CONLLU_FILE)) {
        System.err.println("Loading file " + conlluFile);
        Iterator<SemanticGraph> it = reader.getIterator(IOUtils.readerFromString(conlluFile));

        while (it.hasNext()) {
          SemanticGraph graph = it.next();
          graphs.add(graph);
        }
      }
    }

    for (SemanticGraph graph : graphs) {
      SemgrexMatcher matcher = semgrex.matcher(graph);
      if (!(matcher.find())) {
        continue;
      }

      if (outputFormat == OutputFormat.LIST) {
        System.err.println("Matched graph:");
        System.err.println(graph.toString(SemanticGraph.OutputFormat.LIST));
        boolean found = true;
        while (found) {
          System.err.println(
              "Matches at: " + matcher.getMatch().value() + "-" + matcher.getMatch().index());
          List<String> nodeNames = Generics.newArrayList();
          nodeNames.addAll(matcher.getNodeNames());
          Collections.sort(nodeNames);
          for (String name : nodeNames) {
            System.err.println(
                "  "
                    + name
                    + ": "
                    + matcher.getNode(name).value()
                    + "-"
                    + matcher.getNode(name).index());
          }
          System.err.println();
          found = matcher.find();
        }
      } else if (outputFormat == OutputFormat.OFFSET) {
        if (graph.vertexListSorted().isEmpty()) {
          continue;
        }
        System.out.printf(
            "+%d %s%n",
            graph.vertexListSorted().get(0).get(CoreAnnotations.LineNumberAnnotation.class),
            argsMap.get(CONLLU_FILE)[0]);
      }
    }
  }
예제 #7
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      Future<String> json =
          corenlpExecutor.submit(
              () -> {
                try {
                  // Get the document
                  Properties props =
                      new Properties() {
                        {
                          setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse");
                        }
                      };
                  Annotation doc = getDocument(props, httpExchange);
                  if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
                    StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
                    pipeline.annotate(doc);
                  }

                  // Construct the matcher
                  Map<String, String> params = getURLParams(httpExchange.getRequestURI());
                  // (get the pattern)
                  if (!params.containsKey("pattern")) {
                    respondError("Missing required parameter 'pattern'", httpExchange);
                    return "";
                  }
                  String pattern = params.get("pattern");
                  // (get whether to filter / find)
                  String filterStr = params.getOrDefault("filter", "false");
                  final boolean filter =
                      filterStr.trim().isEmpty()
                          || "true".equalsIgnoreCase(filterStr.toLowerCase());
                  // (create the matcher)
                  final SemgrexPattern regex = SemgrexPattern.compile(pattern);

                  // Run TokensRegex
                  return JSONOutputter.JSONWriter.objectToJSON(
                      (docWriter) -> {
                        if (filter) {
                          // Case: just filter sentences
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          regex
                                              .matcher(
                                                  sentence.get(
                                                      SemanticGraphCoreAnnotations
                                                          .CollapsedCCProcessedDependenciesAnnotation
                                                          .class))
                                              .matches())
                                  .collect(Collectors.toList()));
                        } else {
                          // Case: find matches
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          (Consumer<JSONOutputter.Writer>)
                                              (JSONOutputter.Writer sentWriter) -> {
                                                SemgrexMatcher matcher =
                                                    regex.matcher(
                                                        sentence.get(
                                                            SemanticGraphCoreAnnotations
                                                                .CollapsedCCProcessedDependenciesAnnotation
                                                                .class));
                                                int i = 0;
                                                while (matcher.find()) {
                                                  sentWriter.set(
                                                      Integer.toString(i),
                                                      (Consumer<JSONOutputter.Writer>)
                                                          (JSONOutputter.Writer matchWriter) -> {
                                                            IndexedWord match = matcher.getMatch();
                                                            matchWriter.set("text", match.word());
                                                            matchWriter.set(
                                                                "begin", match.index() - 1);
                                                            matchWriter.set("end", match.index());
                                                            for (String capture :
                                                                matcher.getNodeNames()) {
                                                              matchWriter.set(
                                                                  "$" + capture,
                                                                  (Consumer<JSONOutputter.Writer>)
                                                                      groupWriter -> {
                                                                        IndexedWord node =
                                                                            matcher.getNode(
                                                                                capture);
                                                                        groupWriter.set(
                                                                            "text", node.word());
                                                                        groupWriter.set(
                                                                            "begin",
                                                                            node.index() - 1);
                                                                        groupWriter.set(
                                                                            "end", node.index());
                                                                      });
                                                            }
                                                          });
                                                  i += 1;
                                                }
                                                sentWriter.set("length", i);
                                              }));
                        }
                      });
                } catch (Exception e) {
                  e.printStackTrace();
                  try {
                    respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
                  } catch (IOException ignored) {
                  }
                }
                return "";
              });

      // Send response
      byte[] response = new byte[0];
      try {
        response = json.get(5, TimeUnit.SECONDS).getBytes();
      } catch (InterruptedException | ExecutionException | TimeoutException e) {
        respondError("Timeout when executing Semgrex query", httpExchange);
      }
      if (response.length > 0) {
        httpExchange.getResponseHeaders().add("Content-Type", "text/json");
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      }
    }
예제 #8
0
 public final SemgrexPattern SubNode(GraphRelation r) throws ParseException {
   SemgrexPattern result = null;
   SemgrexPattern child = null;
   switch ((jj_ntk == -1) ? jj_ntk_f() : jj_ntk) {
     case 13:
       {
         jj_consume_token(13);
         result = SubNode(r);
         jj_consume_token(14);
         switch ((jj_ntk == -1) ? jj_ntk_f() : jj_ntk) {
           case RELATION:
           case ALIGNRELN:
           case IDENTIFIER:
           case 17:
           case 18:
           case 19:
             {
               child = RelationDisj();
               break;
             }
           default:
             jj_la1[2] = jj_gen;
             ;
         }
         if (child != null) {
           List<SemgrexPattern> newChildren = new ArrayList<SemgrexPattern>();
           newChildren.addAll(result.getChildren());
           newChildren.add(child);
           result.setChild(new CoordinationPattern(false, newChildren, true));
         }
         {
           if ("" != null) return result;
         }
         break;
       }
     case 17:
     case 19:
     case 23:
       {
         result = ModNode(r);
         switch ((jj_ntk == -1) ? jj_ntk_f() : jj_ntk) {
           case RELATION:
           case ALIGNRELN:
           case IDENTIFIER:
           case 17:
           case 18:
           case 19:
             {
               child = RelationDisj();
               break;
             }
           default:
             jj_la1[3] = jj_gen;
             ;
         }
         if (child != null) result.setChild(child);
         {
           if ("" != null) return result;
         }
         break;
       }
     default:
       jj_la1[4] = jj_gen;
       jj_consume_token(-1);
       throw new ParseException();
   }
   throw new Error("Missing return statement in function");
 }