private static void writeConstituent(Constituent c, JsonObject cJ) { writeString("label", c.getLabel(), cJ); if (c.getConstituentScore() != 0) writeDouble("score", c.getConstituentScore(), cJ); writeInt("start", c.getStartSpan(), cJ); writeInt("end", c.getEndSpan(), cJ); writeAttributes(c, cJ); Map<String, Double> labelsToScores = c.getLabelsToScores(); if (null != labelsToScores) writeLabelsToScores(labelsToScores, cJ); }
protected String getNERString() { List<Constituent> constituents = new ArrayList<>(view.getConstituents()); Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator); StringBuilder sb = new StringBuilder(); String text = textAnnotation.getText(); int where = 0; for (Constituent c : constituents) { int start = c.getStartCharOffset(); String startstring = text.substring(where, start); sb.append(startstring) .append("[") .append(c.getLabel()) .append(" ") .append(c.getTokenizedSurfaceForm()) .append(" ] "); where = c.getEndCharOffset(); } return sb.toString(); }
public List<Constituent> getConnectedNPs(Problem prob) { List<Constituent> npList = new ArrayList<>(); List<Constituent> npListQuantRemoved = new ArrayList<>(); boolean onlyQuantityInSentence = true; int sentId = prob.ta.getSentenceFromToken(quantPhrase.getStartSpan()).getSentenceId(); for (QuantSpan qs : prob.quantities) { int tokenId = prob.ta.getTokenIdFromCharacterOffset(qs.start); if (prob.ta.getSentenceFromToken(tokenId).getSentenceId() == sentId && !(quantPhrase.getStartSpan() <= tokenId && quantPhrase.getEndSpan() > tokenId)) { onlyQuantityInSentence = false; break; } } // Find NPs from children of verb if (verbPhrase != null) { List<Relation> relations = verbPhrase.getOutgoingRelations(); for (Relation relation : relations) { if (!relation.getRelationName().equals("nsubj")) continue; Constituent dst = relation.getTarget(); for (Constituent cons : prob.chunks) { if (cons.getStartSpan() <= dst.getStartSpan() && cons.getEndSpan() > dst.getStartSpan() && cons.getLabel().equals("NP") && !npList.contains(cons)) { npList.add(cons); subject = cons; break; } } } } // Find NPs from PP NP connection int quantPhraseId = getChunkIndex(prob, quantPhrase.getStartSpan()); if (quantPhraseId + 2 < prob.chunks.size() && !prob.chunks.get(quantPhraseId + 1).getSurfaceForm().trim().equals("of") && prob.chunks.get(quantPhraseId + 1).getLabel().equals("PP") && prob.chunks.get(quantPhraseId + 2).getLabel().equals("NP") && !npList.contains(prob.chunks.get(quantPhraseId + 2))) { npList.add(prob.chunks.get(quantPhraseId + 2)); } if (quantPhraseId - 2 >= 0 && prob.chunks.get(quantPhraseId - 1).getLabel().equals("PP") && prob.chunks.get(quantPhraseId - 2).getLabel().equals("NP") && !npList.contains(prob.chunks.get(quantPhraseId - 2))) { npList.add(prob.chunks.get(quantPhraseId - 2)); } // Get preceding NP if (quantPhraseId - 1 >= 0 && prob.chunks.get(quantPhraseId - 1).getLabel().equals("NP") && !prob.posTags .get(prob.chunks.get(quantPhraseId - 1).getEndSpan()) .getLabel() .equals("CC") && !npList.contains(prob.chunks.get(quantPhraseId - 1))) { npList.add(prob.chunks.get(quantPhraseId - 1)); } // Get succeeding NP if (quantPhraseId + 1 < prob.chunks.size() && prob.chunks.get(quantPhraseId + 1).getLabel().equals("NP") && !prob.posTags.get(prob.chunks.get(quantPhraseId).getEndSpan()).getLabel().equals("CC") && !npList.contains(prob.chunks.get(quantPhraseId + 1))) { npList.add(prob.chunks.get(quantPhraseId + 1)); } // If only quantity in sentence, all NPs are connected if (onlyQuantityInSentence) { for (int i = 0; i < prob.chunks.size(); ++i) { Constituent cons = prob.chunks.get(i); if (cons.getSentenceId() == sentId && (i > quantPhraseId + 2 || i < quantPhraseId - 2) && !npList.contains(cons) && cons.getLabel().equals("NP")) { npList.add(cons); } } } // Remove quantity phrases from npList for (Constituent cons : npList) { boolean allow = true; for (QuantSpan qs : prob.quantities) { int index = prob.ta.getTokenIdFromCharacterOffset(qs.start); if (index >= cons.getStartSpan() && index < cons.getEndSpan()) { allow = false; break; } } if (allow) { npListQuantRemoved.add(cons); } } return npListQuantRemoved; }