public static final String doCorefResolution(Annotation annotation) { Map<Integer, CorefChain> corefs = annotation.get(CorefChainAnnotation.class); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); List<String> resolved = new ArrayList<String>(); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { Integer corefClustId = token.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); CorefChain chain = corefs.get(corefClustId); if (chain == null) resolved.add(token.word()); else { int sentINdx = chain.getRepresentativeMention().sentNum - 1; CoreMap corefSentence = sentences.get(sentINdx); List<CoreLabel> corefSentenceTokens = corefSentence.get(TokensAnnotation.class); CorefMention reprMent = chain.getRepresentativeMention(); if (token.index() < reprMent.startIndex || token.index() > reprMent.endIndex) { for (int i = reprMent.startIndex; i < reprMent.endIndex; i++) { CoreLabel matchedLabel = corefSentenceTokens.get(i - 1); resolved.add(matchedLabel.word()); } } else resolved.add(token.word()); } } } String resolvedStr = ""; System.out.println(); for (String str : resolved) { resolvedStr += str + " "; } System.out.println(resolvedStr); return resolvedStr; }
/** * The basic method for splitting off a clause of a tree. This modifies the tree in place. This * method addtionally follows ref edges. * * @param tree The tree to split a clause from. * @param toKeep The edge representing the clause to keep. */ @SuppressWarnings("unchecked") private void simpleClause(SemanticGraph tree, SemanticGraphEdge toKeep) { splitToChildOfEdge(tree, toKeep); // Follow 'ref' edges Map<IndexedWord, IndexedWord> refReplaceMap = new HashMap<>(); // (find replacements) for (IndexedWord vertex : tree.vertexSet()) { for (SemanticGraphEdge edge : extraEdgesByDependent.get(vertex)) { if ("ref".equals(edge.getRelation().toString()) && // it's a ref edge... !tree.containsVertex( edge.getGovernor())) { // ...that doesn't already exist in the tree. refReplaceMap.put(vertex, edge.getGovernor()); } } } // (do replacements) for (Map.Entry<IndexedWord, IndexedWord> entry : refReplaceMap.entrySet()) { Iterator<SemanticGraphEdge> iter = tree.incomingEdgeIterator(entry.getKey()); if (!iter.hasNext()) { continue; } SemanticGraphEdge incomingEdge = iter.next(); IndexedWord governor = incomingEdge.getGovernor(); tree.removeVertex(entry.getKey()); addSubtree( tree, governor, incomingEdge.getRelation().toString(), this.tree, entry.getValue(), this.tree.incomingEdgeList(tree.getFirstRoot())); } }
/** * Parse the parameters of a connection into a CoreNLP properties file that can be passed into * {@link StanfordCoreNLP}, and used in the I/O stages. * * @param httpExchange The http exchange; effectively, the request information. * @return A {@link Properties} object corresponding to a combination of default and passed * properties. * @throws UnsupportedEncodingException Thrown if we could not decode the key/value pairs with * UTF-8. */ private Properties getProperties(HttpExchange httpExchange) throws UnsupportedEncodingException { // Load the default properties Properties props = new Properties(); defaultProps .entrySet() .stream() .forEach( entry -> props.setProperty(entry.getKey().toString(), entry.getValue().toString())); // Try to get more properties from query string. Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI()); if (urlParams.containsKey("properties")) { StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8")) .entrySet() .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue())); } else if (urlParams.containsKey("props")) { StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8")) .entrySet() .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue())); } // Make sure the properties compile props.setProperty( "annotators", StanfordCoreNLP.ensurePrerequisiteAnnotators( props.getProperty("annotators").split("[, \t]+"))); return props; }
public Object intern(Object o) { Object i = oToO.get(o); if (i == null) { i = o; oToO.put(o, o); } return i; }
public void addInPlace(SentenceKey key, SentenceStatistics sentenceStatistics) { for (Map<SentenceKey, EnsembleStatistics> impl : this.impl) { EnsembleStatistics stats = impl.get(key); if (stats == null) { stats = new EnsembleStatistics(new LinkedList<SentenceStatistics>()); impl.put(key, stats); } stats.addInPlace(sentenceStatistics); } }
private Counter<String> uniformRandom() { Counter<String> uniformRandom = new ClassicCounter<>(MapFactory.<String, MutableDouble>linkedHashMapFactory()); for (Map<SentenceKey, EnsembleStatistics> impl : this.impl) { for (Map.Entry<SentenceKey, EnsembleStatistics> entry : impl.entrySet()) { uniformRandom.setCount(entry.getKey().sentenceHash, 1.0); } } return uniformRandom; }
private Counter<String> highKLFromMean() { // Get confidences Counter<String> kl = new ClassicCounter<>(MapFactory.<String, MutableDouble>linkedHashMapFactory()); for (Map<SentenceKey, EnsembleStatistics> impl : this.impl) { for (Map.Entry<SentenceKey, EnsembleStatistics> entry : impl.entrySet()) { kl.setCount(entry.getKey().sentenceHash, entry.getValue().averageKLFromMean()); } } return kl; }
protected String historyToString(List history) { String str = (String) historyToString.get(history); if (str == null) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < history.size(); i++) { sb.append('^'); sb.append(history.get(i)); } str = sb.toString(); historyToString.put(history, str); } return str; }
private List<CoreMap> toCoreMaps( CoreMap annotation, List<TimeExpression> timeExpressions, SUTime.TimeIndex timeIndex) { if (timeExpressions == null) return null; List<CoreMap> coreMaps = new ArrayList<CoreMap>(timeExpressions.size()); for (TimeExpression te : timeExpressions) { CoreMap cm = te.getAnnotation(); SUTime.Temporal temporal = te.getTemporal(); if (temporal != null) { String origText = annotation.get(CoreAnnotations.TextAnnotation.class); String text = cm.get(CoreAnnotations.TextAnnotation.class); if (origText != null) { // Make sure the text is from original (and not from concatenated tokens) ChunkAnnotationUtils.annotateChunkText(cm, annotation); text = cm.get(CoreAnnotations.TextAnnotation.class); } Map<String, String> timexAttributes; try { timexAttributes = temporal.getTimexAttributes(timeIndex); if (options.includeRange) { SUTime.Temporal rangeTemporal = temporal.getRange(); if (rangeTemporal != null) { timexAttributes.put("range", rangeTemporal.toString()); } } } catch (Exception e) { logger.log( Level.WARNING, "Failed to get attributes from " + text + ", timeIndex " + timeIndex, e); continue; } Timex timex; try { timex = Timex.fromMap(text, timexAttributes); } catch (Exception e) { logger.log( Level.WARNING, "Failed to process " + text + " with attributes " + timexAttributes, e); continue; } cm.set(TimexAnnotation.class, timex); if (timex != null) { coreMaps.add(cm); } else { logger.warning("No timex expression for: " + text); } } } return coreMaps; }
private Counter<String> lowAverageConfidence() { // Get confidences Counter<String> lowConfidence = new ClassicCounter<>(MapFactory.<String, MutableDouble>linkedHashMapFactory()); for (Map<SentenceKey, EnsembleStatistics> impl : this.impl) { for (Map.Entry<SentenceKey, EnsembleStatistics> entry : impl.entrySet()) { SentenceStatistics average = entry.getValue().mean(); for (double confidence : average.confidence) { lowConfidence.setCount(entry.getKey().sentenceHash, 1 - confidence); } } } return lowConfidence; }
/** Run some sanity checks on the training statistics, to make sure they look valid. */ public void validate() { for (Map<SentenceKey, EnsembleStatistics> map : impl) { for (EnsembleStatistics stats : map.values()) { for (SentenceStatistics component : stats.statisticsForClassifiers) { assert !Counters.isUniformDistribution(component.relationDistribution, 1e-5); Counters.normalize( component.relationDistribution); // TODO(gabor) this shouldn't be necessary assert (Math.abs(component.relationDistribution.totalCount() - 1.0)) < 1e-5; } assert (Math.abs(stats.mean().relationDistribution.totalCount() - 1.0)) < 1e-5; assert !Counters.isUniformDistribution(stats.mean().relationDistribution, 1e-5); } } }
@Override public void handle(HttpExchange httpExchange) throws IOException { Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI()); httpExchange.getResponseHeaders().set("Content-Type", "text/plain"); boolean doExit = false; String response = "Invalid shutdown key\n"; if (urlParams.containsKey("key") && urlParams.get("key").equals(shutdownKey)) { response = "Shutdown successful!\n"; doExit = true; } httpExchange.sendResponseHeaders(HTTP_OK, response.getBytes().length); httpExchange.getResponseBody().write(response.getBytes()); httpExchange.close(); if (doExit) { System.exit(0); } }
public XBarGrammarProjection(BinaryGrammar bg, UnaryGrammar ug) { Map<BinaryRule, BinaryRule> binaryRules = new HashMap<BinaryRule, BinaryRule>(); Map<UnaryRule, UnaryRule> unaryRules = new HashMap<UnaryRule, UnaryRule>(); sourceUG = ug; sourceBG = bg; sourceNumberer = Numberer.getGlobalNumberer(bg.stateSpace()); targetNumberer = Numberer.getGlobalNumberer(bg.stateSpace() + "-xbar"); projection = new int[sourceNumberer.total()]; scanStates(sourceNumberer, targetNumberer); targetBG = new BinaryGrammar(targetNumberer.total(), bg.stateSpace() + "-xbar"); targetUG = new UnaryGrammar(targetNumberer.total()); for (Iterator<BinaryRule> brI = bg.iterator(); brI.hasNext(); ) { BinaryRule rule = projectBinaryRule(brI.next()); Rule old = binaryRules.get(rule); if (old == null || rule.score > old.score) { binaryRules.put(rule, rule); } } for (BinaryRule br : binaryRules.keySet()) { targetBG.addRule(br); // System.out.println("BR: "+targetNumberer.object(br.parent)+" -> // "+targetNumberer.object(br.leftChild)+" "+targetNumberer.object(br.rightChild)+" %% // "+br.score); } targetBG.splitRules(); for (int parent = 0; parent < sourceNumberer.total(); parent++) { for (Iterator<UnaryRule> urI = ug.ruleIteratorByParent(parent); urI.hasNext(); ) { UnaryRule sourceRule = urI.next(); UnaryRule rule = projectUnaryRule(sourceRule); Rule old = unaryRules.get(rule); if (old == null || rule.score > old.score) { unaryRules.put(rule, rule); } /* if (((UnaryRule)rule).child == targetNumberer.number("PRP") && ((String)sourceNumberer.object(rule.parent)).charAt(0) == 'N') { System.out.println("Source UR: "+sourceRule+" %% "+sourceRule.score); System.out.println("Score of "+rule+"is now: "+((UnaryRule)unaryRules.get(rule)).score); } */ } } for (UnaryRule ur : unaryRules.keySet()) { targetUG.addRule(ur); // System.out.println("UR: "+targetNumberer.object(ur.parent)+" -> // "+targetNumberer.object(ur.child)+" %% "+ur.score); } targetUG.purgeRules(); System.out.println( "Projected " + sourceNumberer.total() + " states to " + targetNumberer.total() + " states."); }
/** * Parse the URL parameters into a map of (key, value) pairs. * * @param uri The URL that was requested. * @return A map of (key, value) pairs corresponding to the request parameters. * @throws UnsupportedEncodingException Thrown if we could not decode the URL with utf8. */ private static Map<String, String> getURLParams(URI uri) throws UnsupportedEncodingException { if (uri.getQuery() != null) { Map<String, String> urlParams = new HashMap<>(); String query = uri.getQuery(); String[] queryFields = query.replace("\\&", "___AMP___").split("&"); for (String queryField : queryFields) { queryField = queryField.replace("___AMP___", "&"); int firstEq = queryField.indexOf('='); // Convention uses "+" for spaces. String key = URLDecoder.decode(queryField.substring(0, firstEq), "utf8"); String value = URLDecoder.decode(queryField.substring(firstEq + 1), "utf8"); urlParams.put(key, value); } return urlParams; } else { return Collections.emptyMap(); } }
public TrainingStatistics merge(TrainingStatistics other) { Map<SentenceKey, EnsembleStatistics> newStats = new HashMap<>(); // Add elements from this statistics for (Map<SentenceKey, EnsembleStatistics> map : this.impl) { for (SentenceKey key : map.keySet()) { newStats.put(key, new EnsembleStatistics(map.get(key))); } } // Add elements from other statistics for (Map<SentenceKey, EnsembleStatistics> map : other.impl) { for (SentenceKey key : map.keySet()) { EnsembleStatistics existing = newStats.get(key); if (existing == null) { existing = new EnsembleStatistics(new LinkedList<SentenceStatistics>()); newStats.put(key, existing); } existing.addInPlace(map.get(key)); } } // Return return new TrainingStatistics(Maybe.Just(newStats)); }
public Object formResult() { Set brs = new HashSet(); Set urs = new HashSet(); // scan each rule / history pair int ruleCount = 0; for (Iterator pairI = rulePairs.keySet().iterator(); pairI.hasNext(); ) { if (ruleCount % 100 == 0) { System.err.println("Rules multiplied: " + ruleCount); } ruleCount++; Pair rulePair = (Pair) pairI.next(); Rule baseRule = (Rule) rulePair.first; String baseLabel = (String) ruleToLabel.get(baseRule); List history = (List) rulePair.second; double totalProb = 0; for (int depth = 1; depth <= HISTORY_DEPTH() && depth <= history.size(); depth++) { List subHistory = history.subList(0, depth); double c_label = labelPairs.getCount(new Pair(baseLabel, subHistory)); double c_rule = rulePairs.getCount(new Pair(baseRule, subHistory)); // System.out.println("Multiplying out "+baseRule+" with history "+subHistory); // System.out.println("Count of "+baseLabel+" with "+subHistory+" is "+c_label); // System.out.println("Count of "+baseRule+" with "+subHistory+" is "+c_rule ); double prob = (1.0 / HISTORY_DEPTH()) * (c_rule) / (c_label); totalProb += prob; for (int childDepth = 0; childDepth <= Math.min(HISTORY_DEPTH() - 1, depth); childDepth++) { Rule rule = specifyRule(baseRule, subHistory, childDepth); rule.score = (float) Math.log(totalProb); // System.out.println("Created "+rule+" with score "+rule.score); if (rule instanceof UnaryRule) { urs.add(rule); } else { brs.add(rule); } } } } System.out.println("Total states: " + stateNumberer.total()); BinaryGrammar bg = new BinaryGrammar(stateNumberer.total()); UnaryGrammar ug = new UnaryGrammar(stateNumberer.total()); for (Iterator brI = brs.iterator(); brI.hasNext(); ) { BinaryRule br = (BinaryRule) brI.next(); bg.addRule(br); } for (Iterator urI = urs.iterator(); urI.hasNext(); ) { UnaryRule ur = (UnaryRule) urI.next(); ug.addRule(ur); } return new Pair(ug, bg); }
protected void tallyInternalNode(Tree lt, List parents) { // form base rule String label = lt.label().value(); Rule baseR = ltToRule(lt); ruleToLabel.put(baseR, label); // act on each history depth for (int depth = 0, maxDepth = Math.min(HISTORY_DEPTH(), parents.size()); depth <= maxDepth; depth++) { List history = new ArrayList(parents.subList(0, depth)); // tally each history level / rewrite pair rulePairs.incrementCount(new Pair(baseR, history), 1); labelPairs.incrementCount(new Pair(label, history), 1); } }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final SemgrexPattern regex = SemgrexPattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set( "begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set( "$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode( capture); groupWriter.set( "text", node.word()); groupWriter.set( "begin", node.index() - 1); groupWriter.set( "end", node.index()); }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing Semgrex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }