private void prettyPrint(PrintWriter pw, int indent) { for (int i = 0; i < indent; i++) { pw.print(" "); } pw.println(localString()); for (SemgrexPattern child : getChildren()) { child.prettyPrint(pw, indent + 1); } }
/** * Creates a pattern from the given string. * * @param semgrex the pattern string * @return a SemgrexPattern for the string. */ public static SemgrexPattern compile(String semgrex, Env env) { try { SemgrexParser parser = new SemgrexParser(new StringReader(semgrex + "\n")); SemgrexPattern newPattern = parser.Root(); newPattern.env = env; newPattern.patternString = semgrex; return newPattern; } catch (ParseException ex) { throw new SemgrexParseException("Error parsing semgrex pattern " + semgrex, ex); } catch (TokenMgrError er) { throw new SemgrexParseException("Error parsing semgrex pattern " + semgrex, er); } }
public String toString(boolean hasPrecedence, boolean addChild) { StringBuilder sb = new StringBuilder(); if (isNegated()) { sb.append('!'); } if (isOptional()) { sb.append('?'); } sb.append(' '); if (reln != null) { sb.append(reln.toString()); sb.append(' '); } if (!hasPrecedence && addChild && child != null) { sb.append('('); } if (negDesc) { sb.append('!'); } sb.append(descString); if (name != null) { sb.append('=').append(name); } if (addChild && child != null) { sb.append(' '); sb.append(child.toString(false)); if (!hasPrecedence) { sb.append(')'); } } return sb.toString(); }
public void testVPOnlyReplacedWith() { String conll = "1\treplaced\t0\tconj:and\tVBD\n" + "2\twith\t5\tcase\tIN\n" + "3\ta\t5\tdet\tDT\n" + "4\tdifferent\t5\tamod\tJJ\n" + "5\ttype\t1\tnmod:with\tNN\n" + "6\tof\t7\tcase\tIN\n" + "7\tfilter\t5\tnmod:of\tNN\n"; // Positive case boolean matches = false; SemanticGraph tree = mkTree(conll).first; for (SemgrexPattern candidate : new RelationTripleSegmenter().VP_PATTERNS) { if (candidate.matcher(tree).matches()) { matches = true; } } assertTrue(matches); }
public final SemgrexPattern ModRelation() throws ParseException { SemgrexPattern child; boolean startUnderNeg; switch ((jj_ntk == -1) ? jj_ntk_f() : jj_ntk) { case RELATION: case ALIGNRELN: case IDENTIFIER: case 19: { child = RelChild(); break; } case 17: { jj_consume_token(17); startUnderNeg = underNegation; underNegation = true; child = RelChild(); underNegation = startUnderNeg; child.negate(); break; } case 18: { jj_consume_token(18); child = RelChild(); child.makeOptional(); break; } default: jj_la1[8] = jj_gen; jj_consume_token(-1); throw new ParseException(); } { if ("" != null) return child; } throw new Error("Missing return statement in function"); }
/** * Prints out all matches of a semgrex pattern on a file of dependencies. <br> * Usage:<br> * java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args] <br> * See the help() function for a list of possible arguments to provide. */ public static void main(String[] args) throws IOException { Map<String, Integer> flagMap = Generics.newHashMap(); flagMap.put(PATTERN, 1); flagMap.put(TREE_FILE, 1); flagMap.put(MODE, 1); flagMap.put(EXTRAS, 1); flagMap.put(CONLLU_FILE, 1); flagMap.put(OUTPUT_FORMAT_OPTION, 1); Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap); args = argsMap.get(null); // TODO: allow patterns to be extracted from a file if (!(argsMap.containsKey(PATTERN)) || argsMap.get(PATTERN).length == 0) { help(); System.exit(2); } SemgrexPattern semgrex = SemgrexPattern.compile(argsMap.get(PATTERN)[0]); String modeString = DEFAULT_MODE; if (argsMap.containsKey(MODE) && argsMap.get(MODE).length > 0) { modeString = argsMap.get(MODE)[0].toUpperCase(); } SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.valueOf(modeString); String outputFormatString = DEFAULT_OUTPUT_FORMAT; if (argsMap.containsKey(OUTPUT_FORMAT_OPTION) && argsMap.get(OUTPUT_FORMAT_OPTION).length > 0) { outputFormatString = argsMap.get(OUTPUT_FORMAT_OPTION)[0].toUpperCase(); } OutputFormat outputFormat = OutputFormat.valueOf(outputFormatString); boolean useExtras = true; if (argsMap.containsKey(EXTRAS) && argsMap.get(EXTRAS).length > 0) { useExtras = Boolean.valueOf(argsMap.get(EXTRAS)[0]); } List<SemanticGraph> graphs = Generics.newArrayList(); // TODO: allow other sources of graphs, such as dependency files if (argsMap.containsKey(TREE_FILE) && argsMap.get(TREE_FILE).length > 0) { for (String treeFile : argsMap.get(TREE_FILE)) { System.err.println("Loading file " + treeFile); MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer()); treebank.loadPath(treeFile); for (Tree tree : treebank) { // TODO: allow other languages... this defaults to English SemanticGraph graph = SemanticGraphFactory.makeFromTree( tree, mode, useExtras ? GrammaticalStructure.Extras.MAXIMAL : GrammaticalStructure.Extras.NONE, true); graphs.add(graph); } } } if (argsMap.containsKey(CONLLU_FILE) && argsMap.get(CONLLU_FILE).length > 0) { CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); for (String conlluFile : argsMap.get(CONLLU_FILE)) { System.err.println("Loading file " + conlluFile); Iterator<SemanticGraph> it = reader.getIterator(IOUtils.readerFromString(conlluFile)); while (it.hasNext()) { SemanticGraph graph = it.next(); graphs.add(graph); } } } for (SemanticGraph graph : graphs) { SemgrexMatcher matcher = semgrex.matcher(graph); if (!(matcher.find())) { continue; } if (outputFormat == OutputFormat.LIST) { System.err.println("Matched graph:"); System.err.println(graph.toString(SemanticGraph.OutputFormat.LIST)); boolean found = true; while (found) { System.err.println( "Matches at: " + matcher.getMatch().value() + "-" + matcher.getMatch().index()); List<String> nodeNames = Generics.newArrayList(); nodeNames.addAll(matcher.getNodeNames()); Collections.sort(nodeNames); for (String name : nodeNames) { System.err.println( " " + name + ": " + matcher.getNode(name).value() + "-" + matcher.getNode(name).index()); } System.err.println(); found = matcher.find(); } } else if (outputFormat == OutputFormat.OFFSET) { if (graph.vertexListSorted().isEmpty()) { continue; } System.out.printf( "+%d %s%n", graph.vertexListSorted().get(0).get(CoreAnnotations.LineNumberAnnotation.class), argsMap.get(CONLLU_FILE)[0]); } } }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final SemgrexPattern regex = SemgrexPattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set( "begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set( "$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode( capture); groupWriter.set( "text", node.word()); groupWriter.set( "begin", node.index() - 1); groupWriter.set( "end", node.index()); }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing Semgrex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }
public final SemgrexPattern SubNode(GraphRelation r) throws ParseException { SemgrexPattern result = null; SemgrexPattern child = null; switch ((jj_ntk == -1) ? jj_ntk_f() : jj_ntk) { case 13: { jj_consume_token(13); result = SubNode(r); jj_consume_token(14); switch ((jj_ntk == -1) ? jj_ntk_f() : jj_ntk) { case RELATION: case ALIGNRELN: case IDENTIFIER: case 17: case 18: case 19: { child = RelationDisj(); break; } default: jj_la1[2] = jj_gen; ; } if (child != null) { List<SemgrexPattern> newChildren = new ArrayList<SemgrexPattern>(); newChildren.addAll(result.getChildren()); newChildren.add(child); result.setChild(new CoordinationPattern(false, newChildren, true)); } { if ("" != null) return result; } break; } case 17: case 19: case 23: { result = ModNode(r); switch ((jj_ntk == -1) ? jj_ntk_f() : jj_ntk) { case RELATION: case ALIGNRELN: case IDENTIFIER: case 17: case 18: case 19: { child = RelationDisj(); break; } default: jj_la1[3] = jj_gen; ; } if (child != null) result.setChild(child); { if ("" != null) return result; } break; } default: jj_la1[4] = jj_gen; jj_consume_token(-1); throw new ParseException(); } throw new Error("Missing return statement in function"); }