List<Util.Pair<Integer, Integer>> getCandidates( Document document, int arg2Line, int connStart, int connEnd, int arg1Line) { List<Util.Pair<Integer, Integer>> candidates = new ArrayList<Util.Pair<Integer, Integer>>(); int distance = 10; Sentence arg2Sentence = document.getSentence(arg2Line); // String conn = arg2Sentence.toString(connStart, connEnd).toLowerCase(); // String category = connAnalyzer.getCategory(conn); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); SimpleDepGraph arg2DepGraph = document.getDepGraph(arg2Line); List<Integer> reachable = arg2DepGraph.getReachableIndices(connHeadPos, false, distance); for (Integer i : reachable) { if (arg2Sentence.get(i).getTag("POS").matches("VB.*|NNS?|JJ.*|MD")) { candidates.add(new Util.Pair<Integer, Integer>(arg2Line, i)); } } Tree mainHead = headAnalyzer.getCollinsHead(arg2Sentence.getParseTree().getChild(0)); if (mainHead != null) { int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Sentence.getParseTree(), mainHead); List<String> pathAsList = arg2DepGraph.getPathAsList(connHeadPos, mainHeadPos, false); if (pathAsList != null) { distance = distance - (1 + pathAsList.size()); } else { // System.out.println("No path from connHead to mainHead!"); distance--; } } // if (arg1Line == arg2Line) return candidates; for (int i = arg2Line - 1; i >= 0 && distance >= 0; i--) { Sentence sentence = document.getSentence(i); SimpleDepGraph depGraph = document.getDepGraph(i); mainHead = headAnalyzer.getCollinsHead(sentence.getParseTree().getChild(0)); if (mainHead == null) { distance--; continue; } int mainHeadPos = treeAnalyzer.getLeafPosition(sentence.getParseTree(), mainHead); reachable = depGraph.getReachableIndices(mainHeadPos, false, distance); if (reachable == null) { distance--; continue; } for (Integer j : reachable) { if (sentence.get(j).getTag("POS").matches("VB.*|NNS?|JJ.*|MD")) { candidates.add(new Util.Pair<Integer, Integer>(i, j)); } } distance -= 2; } return candidates; }
private PropertyList addLexicoSyntacticFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { int arg1Line = candidate.first(); Tree root = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); boolean attributive = false; String head = root.getLeaves().get(arg1HeadPos).value(); for (String verb : attributiveVerb) { if (head.matches(verb)) { attributive = true; break; } } pl = PropertyList.add("U=" + attributive, 1.0, pl); SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); boolean hasClausalComp = false; List<SimpleDependency> govDependencies = depGraph.getGovDependencies(arg1HeadPos); for (SimpleDependency dep : govDependencies) { if (dep.reln().equals("ccomp")) { hasClausalComp = true; break; } } pl = PropertyList.add("V=" + hasClausalComp, 1.0, pl); pl = PropertyList.add("W=" + attributive + "&" + hasClausalComp, 1.0, pl); boolean isClausalComp = false; List<SimpleDependency> depDependencies = depGraph.getDepDependencies(arg1HeadPos); SimpleDependency clausalComp = null; for (SimpleDependency dep : depDependencies) { if (dep.reln().equals("ccomp")) { isClausalComp = true; clausalComp = dep; break; } } pl = PropertyList.add("X=" + isClausalComp, 1.0, pl); if (isClausalComp) { int gov = clausalComp.gov(); String govWord = root.getLeaves().get(gov).value(); boolean isGovAttributive = false; for (String verb : attributiveVerb) { if (govWord.matches(verb)) { isGovAttributive = true; break; } } pl = PropertyList.add("Y=" + isClausalComp + "&" + isGovAttributive, 1.0, pl); } return pl; }
private PropertyList addDependencyFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { Sentence arg2Sentence = doc.getSentence(arg2Line); String conn = arg2Sentence.toString(connStart, connEnd); String category = connAnalyzer.getCategory(conn.toLowerCase()); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); int arg1Line = candidate.first(); Tree arg1Tree = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); List<String> path = new ArrayList<String>(); if (arg1Line == arg2Line) { SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); List<String> tmpPath = depGraph.getPathAsList(connHeadPos, arg1HeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } else { Tree arg2Root = arg2Sentence.getParseTree(); Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0)); int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead); if (mainHeadPos != -1) { SimpleDepGraph depGraph = doc.getDepGraph(arg2Line); List<String> tmpPath = depGraph.getPathAsList(connHeadPos, mainHeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) { path.add("SENT"); } Tree arg1Root = arg1Tree; mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0)); mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead); if (mainHeadPos != -1) { SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); List<String> tmpPath = depGraph.getPathAsList(mainHeadPos, arg1HeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } } StringBuilder sbPath = new StringBuilder(); StringBuilder sbPathWithoutCC = new StringBuilder(); StringBuilder sbPathWithoutRep = new StringBuilder(); String prev = ""; for (String node : path) { sbPath.append(node).append(":"); if (!node.matches("cc|-cc")) { sbPathWithoutCC.append(node).append(":"); } if (!node.equals(prev)) { sbPathWithoutRep.append(node).append(":"); } prev = node; } // M-dependency path pl = PropertyList.add("M=" + sbPath.toString(), 1.0, pl); // Q-M&C pl = PropertyList.add("Q=" + "CONN-" + conn + '&' + "M-" + sbPath.toString(), 1.0, pl); // T-M&R pl = PropertyList.add("T=" + "CAT-" + category + '&' + "M-" + sbPath.toString(), 1.0, pl); // O-collapsed path without cc pl = PropertyList.add("O=" + sbPathWithoutCC.toString(), 1.0, pl); // P-collapsed path without repetition pl = PropertyList.add("P=" + sbPathWithoutRep.toString(), 1.0, pl); return pl; }
// added 11-03-2012 int getMainHead(Sentence s, SimpleDepGraph depGraph) { for (int i = 0; i < s.size(); i++) { if (depGraph.getParent(i) == -1) return i; } return -1; }