List<Util.Pair<Integer, Integer>> getCandidates( Document document, int arg2Line, int connStart, int connEnd, int arg1Line) { List<Util.Pair<Integer, Integer>> candidates = new ArrayList<Util.Pair<Integer, Integer>>(); int distance = 10; Sentence arg2Sentence = document.getSentence(arg2Line); // String conn = arg2Sentence.toString(connStart, connEnd).toLowerCase(); // String category = connAnalyzer.getCategory(conn); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); SimpleDepGraph arg2DepGraph = document.getDepGraph(arg2Line); List<Integer> reachable = arg2DepGraph.getReachableIndices(connHeadPos, false, distance); for (Integer i : reachable) { if (arg2Sentence.get(i).getTag("POS").matches("VB.*|NNS?|JJ.*|MD")) { candidates.add(new Util.Pair<Integer, Integer>(arg2Line, i)); } } Tree mainHead = headAnalyzer.getCollinsHead(arg2Sentence.getParseTree().getChild(0)); if (mainHead != null) { int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Sentence.getParseTree(), mainHead); List<String> pathAsList = arg2DepGraph.getPathAsList(connHeadPos, mainHeadPos, false); if (pathAsList != null) { distance = distance - (1 + pathAsList.size()); } else { // System.out.println("No path from connHead to mainHead!"); distance--; } } // if (arg1Line == arg2Line) return candidates; for (int i = arg2Line - 1; i >= 0 && distance >= 0; i--) { Sentence sentence = document.getSentence(i); SimpleDepGraph depGraph = document.getDepGraph(i); mainHead = headAnalyzer.getCollinsHead(sentence.getParseTree().getChild(0)); if (mainHead == null) { distance--; continue; } int mainHeadPos = treeAnalyzer.getLeafPosition(sentence.getParseTree(), mainHead); reachable = depGraph.getReachableIndices(mainHeadPos, false, distance); if (reachable == null) { distance--; continue; } for (Integer j : reachable) { if (sentence.get(j).getTag("POS").matches("VB.*|NNS?|JJ.*|MD")) { candidates.add(new Util.Pair<Integer, Integer>(i, j)); } } distance -= 2; } return candidates; }
private PropertyList addDependencyFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { Sentence arg2Sentence = doc.getSentence(arg2Line); String conn = arg2Sentence.toString(connStart, connEnd); String category = connAnalyzer.getCategory(conn.toLowerCase()); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); int arg1Line = candidate.first(); Tree arg1Tree = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); List<String> path = new ArrayList<String>(); if (arg1Line == arg2Line) { SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); List<String> tmpPath = depGraph.getPathAsList(connHeadPos, arg1HeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } else { Tree arg2Root = arg2Sentence.getParseTree(); Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0)); int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead); if (mainHeadPos != -1) { SimpleDepGraph depGraph = doc.getDepGraph(arg2Line); List<String> tmpPath = depGraph.getPathAsList(connHeadPos, mainHeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) { path.add("SENT"); } Tree arg1Root = arg1Tree; mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0)); mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead); if (mainHeadPos != -1) { SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); List<String> tmpPath = depGraph.getPathAsList(mainHeadPos, arg1HeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } } StringBuilder sbPath = new StringBuilder(); StringBuilder sbPathWithoutCC = new StringBuilder(); StringBuilder sbPathWithoutRep = new StringBuilder(); String prev = ""; for (String node : path) { sbPath.append(node).append(":"); if (!node.matches("cc|-cc")) { sbPathWithoutCC.append(node).append(":"); } if (!node.equals(prev)) { sbPathWithoutRep.append(node).append(":"); } prev = node; } // M-dependency path pl = PropertyList.add("M=" + sbPath.toString(), 1.0, pl); // Q-M&C pl = PropertyList.add("Q=" + "CONN-" + conn + '&' + "M-" + sbPath.toString(), 1.0, pl); // T-M&R pl = PropertyList.add("T=" + "CAT-" + category + '&' + "M-" + sbPath.toString(), 1.0, pl); // O-collapsed path without cc pl = PropertyList.add("O=" + sbPathWithoutCC.toString(), 1.0, pl); // P-collapsed path without repetition pl = PropertyList.add("P=" + sbPathWithoutRep.toString(), 1.0, pl); return pl; }
private PropertyList addConstituentFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { Sentence arg2Sentence = doc.getSentence(arg2Line); String conn = arg2Sentence.toString(connStart, connEnd); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); int arg1Line = candidate.first(); Tree arg1Tree = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); List<String> path = new ArrayList<String>(); List<String> pathWithoutPOS = new ArrayList<String>(); if (arg1Line == arg2Line) { Tree root = arg1Tree; List<Tree> leaves = root.getLeaves(); List<Tree> treePath = root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(arg1HeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } else { Tree arg2Root = arg2Sentence.getParseTree(); Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0)); List<Tree> leaves = arg2Root.getLeaves(); int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead); if (mainHeadPos != -1) { List<Tree> treePath = arg2Root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(mainHeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) { path.add("SENT"); pathWithoutPOS.add("SENT"); } Tree arg1Root = arg1Tree; mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0)); leaves = arg1Root.getLeaves(); mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead); if (mainHeadPos != -1) { List<Tree> treePath = arg1Root.pathNodeToNode(leaves.get(mainHeadPos), leaves.get(arg1HeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } } // H-full path // L-C&H StringBuilder fullPath = new StringBuilder(); for (String node : path) { fullPath.append(node).append(":"); } pl = PropertyList.add("H=" + fullPath.toString(), 1.0, pl); pl = PropertyList.add("L=CONN-" + conn + "&" + "H-" + fullPath.toString(), 1.0, pl); // I-length of path pl = PropertyList.add("I=" + path.size(), 1.0, pl); // J-collapsed path without part of speech // K-collapsed path without repititions fullPath = new StringBuilder(); StringBuilder collapsedPath = new StringBuilder(); String prev = ""; for (String node : pathWithoutPOS) { fullPath.append(node).append(":"); if (!node.equals(prev)) { collapsedPath.append(node).append(":"); } prev = node; } pl = PropertyList.add("J=" + fullPath.toString(), 1.0, pl); pl = PropertyList.add("K=" + collapsedPath.toString(), 1.0, pl); return pl; }