@Override public List<Constituent> transform(Constituent input) { int tokenId = input.getEndSpan(); TextAnnotation ta = input.getTextAnnotation(); Sentence sentence = ta.getSentence(input.getSentenceId()); if (tokenId < sentence.size()) { Constituent c = new Constituent("", "", ta, tokenId, tokenId + 1); return Collections.singletonList(addPointerToSource(input, c)); } else return new ArrayList<>(); }
public List<Constituent> getConnectedNPs(Problem prob) { List<Constituent> npList = new ArrayList<>(); List<Constituent> npListQuantRemoved = new ArrayList<>(); boolean onlyQuantityInSentence = true; int sentId = prob.ta.getSentenceFromToken(quantPhrase.getStartSpan()).getSentenceId(); for (QuantSpan qs : prob.quantities) { int tokenId = prob.ta.getTokenIdFromCharacterOffset(qs.start); if (prob.ta.getSentenceFromToken(tokenId).getSentenceId() == sentId && !(quantPhrase.getStartSpan() <= tokenId && quantPhrase.getEndSpan() > tokenId)) { onlyQuantityInSentence = false; break; } } // Find NPs from children of verb if (verbPhrase != null) { List<Relation> relations = verbPhrase.getOutgoingRelations(); for (Relation relation : relations) { if (!relation.getRelationName().equals("nsubj")) continue; Constituent dst = relation.getTarget(); for (Constituent cons : prob.chunks) { if (cons.getStartSpan() <= dst.getStartSpan() && cons.getEndSpan() > dst.getStartSpan() && cons.getLabel().equals("NP") && !npList.contains(cons)) { npList.add(cons); subject = cons; break; } } } } // Find NPs from PP NP connection int quantPhraseId = getChunkIndex(prob, quantPhrase.getStartSpan()); if (quantPhraseId + 2 < prob.chunks.size() && !prob.chunks.get(quantPhraseId + 1).getSurfaceForm().trim().equals("of") && prob.chunks.get(quantPhraseId + 1).getLabel().equals("PP") && prob.chunks.get(quantPhraseId + 2).getLabel().equals("NP") && !npList.contains(prob.chunks.get(quantPhraseId + 2))) { npList.add(prob.chunks.get(quantPhraseId + 2)); } if (quantPhraseId - 2 >= 0 && prob.chunks.get(quantPhraseId - 1).getLabel().equals("PP") && prob.chunks.get(quantPhraseId - 2).getLabel().equals("NP") && !npList.contains(prob.chunks.get(quantPhraseId - 2))) { npList.add(prob.chunks.get(quantPhraseId - 2)); } // Get preceding NP if (quantPhraseId - 1 >= 0 && prob.chunks.get(quantPhraseId - 1).getLabel().equals("NP") && !prob.posTags .get(prob.chunks.get(quantPhraseId - 1).getEndSpan()) .getLabel() .equals("CC") && !npList.contains(prob.chunks.get(quantPhraseId - 1))) { npList.add(prob.chunks.get(quantPhraseId - 1)); } // Get succeeding NP if (quantPhraseId + 1 < prob.chunks.size() && prob.chunks.get(quantPhraseId + 1).getLabel().equals("NP") && !prob.posTags.get(prob.chunks.get(quantPhraseId).getEndSpan()).getLabel().equals("CC") && !npList.contains(prob.chunks.get(quantPhraseId + 1))) { npList.add(prob.chunks.get(quantPhraseId + 1)); } // If only quantity in sentence, all NPs are connected if (onlyQuantityInSentence) { for (int i = 0; i < prob.chunks.size(); ++i) { Constituent cons = prob.chunks.get(i); if (cons.getSentenceId() == sentId && (i > quantPhraseId + 2 || i < quantPhraseId - 2) && !npList.contains(cons) && cons.getLabel().equals("NP")) { npList.add(cons); } } } // Remove quantity phrases from npList for (Constituent cons : npList) { boolean allow = true; for (QuantSpan qs : prob.quantities) { int index = prob.ta.getTokenIdFromCharacterOffset(qs.start); if (index >= cons.getStartSpan() && index < cons.getEndSpan()) { allow = false; break; } } if (allow) { npListQuantRemoved.add(cons); } } return npListQuantRemoved; }