/** * Parse the parameters of a connection into a CoreNLP properties file that can be passed into * {@link StanfordCoreNLP}, and used in the I/O stages. * * @param httpExchange The http exchange; effectively, the request information. * @return A {@link Properties} object corresponding to a combination of default and passed * properties. * @throws UnsupportedEncodingException Thrown if we could not decode the key/value pairs with * UTF-8. */ private Properties getProperties(HttpExchange httpExchange) throws UnsupportedEncodingException { // Load the default properties Properties props = new Properties(); defaultProps .entrySet() .stream() .forEach( entry -> props.setProperty(entry.getKey().toString(), entry.getValue().toString())); // Try to get more properties from query string. Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI()); if (urlParams.containsKey("properties")) { StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8")) .entrySet() .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue())); } else if (urlParams.containsKey("props")) { StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8")) .entrySet() .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue())); } // Make sure the properties compile props.setProperty( "annotators", StanfordCoreNLP.ensurePrerequisiteAnnotators( props.getProperty("annotators").split("[, \t]+"))); return props; }
public XBarGrammarProjection(BinaryGrammar bg, UnaryGrammar ug) { Map<BinaryRule, BinaryRule> binaryRules = new HashMap<BinaryRule, BinaryRule>(); Map<UnaryRule, UnaryRule> unaryRules = new HashMap<UnaryRule, UnaryRule>(); sourceUG = ug; sourceBG = bg; sourceNumberer = Numberer.getGlobalNumberer(bg.stateSpace()); targetNumberer = Numberer.getGlobalNumberer(bg.stateSpace() + "-xbar"); projection = new int[sourceNumberer.total()]; scanStates(sourceNumberer, targetNumberer); targetBG = new BinaryGrammar(targetNumberer.total(), bg.stateSpace() + "-xbar"); targetUG = new UnaryGrammar(targetNumberer.total()); for (Iterator<BinaryRule> brI = bg.iterator(); brI.hasNext(); ) { BinaryRule rule = projectBinaryRule(brI.next()); Rule old = binaryRules.get(rule); if (old == null || rule.score > old.score) { binaryRules.put(rule, rule); } } for (BinaryRule br : binaryRules.keySet()) { targetBG.addRule(br); // System.out.println("BR: "+targetNumberer.object(br.parent)+" -> // "+targetNumberer.object(br.leftChild)+" "+targetNumberer.object(br.rightChild)+" %% // "+br.score); } targetBG.splitRules(); for (int parent = 0; parent < sourceNumberer.total(); parent++) { for (Iterator<UnaryRule> urI = ug.ruleIteratorByParent(parent); urI.hasNext(); ) { UnaryRule sourceRule = urI.next(); UnaryRule rule = projectUnaryRule(sourceRule); Rule old = unaryRules.get(rule); if (old == null || rule.score > old.score) { unaryRules.put(rule, rule); } /* if (((UnaryRule)rule).child == targetNumberer.number("PRP") && ((String)sourceNumberer.object(rule.parent)).charAt(0) == 'N') { System.out.println("Source UR: "+sourceRule+" %% "+sourceRule.score); System.out.println("Score of "+rule+"is now: "+((UnaryRule)unaryRules.get(rule)).score); } */ } } for (UnaryRule ur : unaryRules.keySet()) { targetUG.addRule(ur); // System.out.println("UR: "+targetNumberer.object(ur.parent)+" -> // "+targetNumberer.object(ur.child)+" %% "+ur.score); } targetUG.purgeRules(); System.out.println( "Projected " + sourceNumberer.total() + " states to " + targetNumberer.total() + " states."); }
public static final String doCorefResolution(Annotation annotation) { Map<Integer, CorefChain> corefs = annotation.get(CorefChainAnnotation.class); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); List<String> resolved = new ArrayList<String>(); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { Integer corefClustId = token.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); CorefChain chain = corefs.get(corefClustId); if (chain == null) resolved.add(token.word()); else { int sentINdx = chain.getRepresentativeMention().sentNum - 1; CoreMap corefSentence = sentences.get(sentINdx); List<CoreLabel> corefSentenceTokens = corefSentence.get(TokensAnnotation.class); CorefMention reprMent = chain.getRepresentativeMention(); if (token.index() < reprMent.startIndex || token.index() > reprMent.endIndex) { for (int i = reprMent.startIndex; i < reprMent.endIndex; i++) { CoreLabel matchedLabel = corefSentenceTokens.get(i - 1); resolved.add(matchedLabel.word()); } } else resolved.add(token.word()); } } } String resolvedStr = ""; System.out.println(); for (String str : resolved) { resolvedStr += str + " "; } System.out.println(resolvedStr); return resolvedStr; }
public Object intern(Object o) { Object i = oToO.get(o); if (i == null) { i = o; oToO.put(o, o); } return i; }
public void addInPlace(SentenceKey key, SentenceStatistics sentenceStatistics) { for (Map<SentenceKey, EnsembleStatistics> impl : this.impl) { EnsembleStatistics stats = impl.get(key); if (stats == null) { stats = new EnsembleStatistics(new LinkedList<SentenceStatistics>()); impl.put(key, stats); } stats.addInPlace(sentenceStatistics); } }
protected String historyToString(List history) { String str = (String) historyToString.get(history); if (str == null) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < history.size(); i++) { sb.append('^'); sb.append(history.get(i)); } str = sb.toString(); historyToString.put(history, str); } return str; }
public TrainingStatistics merge(TrainingStatistics other) { Map<SentenceKey, EnsembleStatistics> newStats = new HashMap<>(); // Add elements from this statistics for (Map<SentenceKey, EnsembleStatistics> map : this.impl) { for (SentenceKey key : map.keySet()) { newStats.put(key, new EnsembleStatistics(map.get(key))); } } // Add elements from other statistics for (Map<SentenceKey, EnsembleStatistics> map : other.impl) { for (SentenceKey key : map.keySet()) { EnsembleStatistics existing = newStats.get(key); if (existing == null) { existing = new EnsembleStatistics(new LinkedList<SentenceStatistics>()); newStats.put(key, existing); } existing.addInPlace(map.get(key)); } } // Return return new TrainingStatistics(Maybe.Just(newStats)); }
public Object formResult() { Set brs = new HashSet(); Set urs = new HashSet(); // scan each rule / history pair int ruleCount = 0; for (Iterator pairI = rulePairs.keySet().iterator(); pairI.hasNext(); ) { if (ruleCount % 100 == 0) { System.err.println("Rules multiplied: " + ruleCount); } ruleCount++; Pair rulePair = (Pair) pairI.next(); Rule baseRule = (Rule) rulePair.first; String baseLabel = (String) ruleToLabel.get(baseRule); List history = (List) rulePair.second; double totalProb = 0; for (int depth = 1; depth <= HISTORY_DEPTH() && depth <= history.size(); depth++) { List subHistory = history.subList(0, depth); double c_label = labelPairs.getCount(new Pair(baseLabel, subHistory)); double c_rule = rulePairs.getCount(new Pair(baseRule, subHistory)); // System.out.println("Multiplying out "+baseRule+" with history "+subHistory); // System.out.println("Count of "+baseLabel+" with "+subHistory+" is "+c_label); // System.out.println("Count of "+baseRule+" with "+subHistory+" is "+c_rule ); double prob = (1.0 / HISTORY_DEPTH()) * (c_rule) / (c_label); totalProb += prob; for (int childDepth = 0; childDepth <= Math.min(HISTORY_DEPTH() - 1, depth); childDepth++) { Rule rule = specifyRule(baseRule, subHistory, childDepth); rule.score = (float) Math.log(totalProb); // System.out.println("Created "+rule+" with score "+rule.score); if (rule instanceof UnaryRule) { urs.add(rule); } else { brs.add(rule); } } } } System.out.println("Total states: " + stateNumberer.total()); BinaryGrammar bg = new BinaryGrammar(stateNumberer.total()); UnaryGrammar ug = new UnaryGrammar(stateNumberer.total()); for (Iterator brI = brs.iterator(); brI.hasNext(); ) { BinaryRule br = (BinaryRule) brI.next(); bg.addRule(br); } for (Iterator urI = urs.iterator(); urI.hasNext(); ) { UnaryRule ur = (UnaryRule) urI.next(); ug.addRule(ur); } return new Pair(ug, bg); }
@Override public void handle(HttpExchange httpExchange) throws IOException { Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI()); httpExchange.getResponseHeaders().set("Content-Type", "text/plain"); boolean doExit = false; String response = "Invalid shutdown key\n"; if (urlParams.containsKey("key") && urlParams.get("key").equals(shutdownKey)) { response = "Shutdown successful!\n"; doExit = true; } httpExchange.sendResponseHeaders(HTTP_OK, response.getBytes().length); httpExchange.getResponseBody().write(response.getBytes()); httpExchange.close(); if (doExit) { System.exit(0); } }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final SemgrexPattern regex = SemgrexPattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set( "begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set( "$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode( capture); groupWriter.set( "text", node.word()); groupWriter.set( "begin", node.index() - 1); groupWriter.set( "end", node.index()); }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing Semgrex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }