List<Util.Pair<Integer, Integer>> getCandidates( Document document, int arg2Line, int connStart, int connEnd, int arg1Line) { List<Util.Pair<Integer, Integer>> candidates = new ArrayList<Util.Pair<Integer, Integer>>(); int distance = 10; Sentence arg2Sentence = document.getSentence(arg2Line); // String conn = arg2Sentence.toString(connStart, connEnd).toLowerCase(); // String category = connAnalyzer.getCategory(conn); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); SimpleDepGraph arg2DepGraph = document.getDepGraph(arg2Line); List<Integer> reachable = arg2DepGraph.getReachableIndices(connHeadPos, false, distance); for (Integer i : reachable) { if (arg2Sentence.get(i).getTag("POS").matches("VB.*|NNS?|JJ.*|MD")) { candidates.add(new Util.Pair<Integer, Integer>(arg2Line, i)); } } Tree mainHead = headAnalyzer.getCollinsHead(arg2Sentence.getParseTree().getChild(0)); if (mainHead != null) { int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Sentence.getParseTree(), mainHead); List<String> pathAsList = arg2DepGraph.getPathAsList(connHeadPos, mainHeadPos, false); if (pathAsList != null) { distance = distance - (1 + pathAsList.size()); } else { // System.out.println("No path from connHead to mainHead!"); distance--; } } // if (arg1Line == arg2Line) return candidates; for (int i = arg2Line - 1; i >= 0 && distance >= 0; i--) { Sentence sentence = document.getSentence(i); SimpleDepGraph depGraph = document.getDepGraph(i); mainHead = headAnalyzer.getCollinsHead(sentence.getParseTree().getChild(0)); if (mainHead == null) { distance--; continue; } int mainHeadPos = treeAnalyzer.getLeafPosition(sentence.getParseTree(), mainHead); reachable = depGraph.getReachableIndices(mainHeadPos, false, distance); if (reachable == null) { distance--; continue; } for (Integer j : reachable) { if (sentence.get(j).getTag("POS").matches("VB.*|NNS?|JJ.*|MD")) { candidates.add(new Util.Pair<Integer, Integer>(i, j)); } } distance -= 2; } return candidates; }
private PropertyList addDependencyFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { Sentence arg2Sentence = doc.getSentence(arg2Line); String conn = arg2Sentence.toString(connStart, connEnd); String category = connAnalyzer.getCategory(conn.toLowerCase()); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); int arg1Line = candidate.first(); Tree arg1Tree = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); List<String> path = new ArrayList<String>(); if (arg1Line == arg2Line) { SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); List<String> tmpPath = depGraph.getPathAsList(connHeadPos, arg1HeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } else { Tree arg2Root = arg2Sentence.getParseTree(); Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0)); int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead); if (mainHeadPos != -1) { SimpleDepGraph depGraph = doc.getDepGraph(arg2Line); List<String> tmpPath = depGraph.getPathAsList(connHeadPos, mainHeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) { path.add("SENT"); } Tree arg1Root = arg1Tree; mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0)); mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead); if (mainHeadPos != -1) { SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); List<String> tmpPath = depGraph.getPathAsList(mainHeadPos, arg1HeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } } StringBuilder sbPath = new StringBuilder(); StringBuilder sbPathWithoutCC = new StringBuilder(); StringBuilder sbPathWithoutRep = new StringBuilder(); String prev = ""; for (String node : path) { sbPath.append(node).append(":"); if (!node.matches("cc|-cc")) { sbPathWithoutCC.append(node).append(":"); } if (!node.equals(prev)) { sbPathWithoutRep.append(node).append(":"); } prev = node; } // M-dependency path pl = PropertyList.add("M=" + sbPath.toString(), 1.0, pl); // Q-M&C pl = PropertyList.add("Q=" + "CONN-" + conn + '&' + "M-" + sbPath.toString(), 1.0, pl); // T-M&R pl = PropertyList.add("T=" + "CAT-" + category + '&' + "M-" + sbPath.toString(), 1.0, pl); // O-collapsed path without cc pl = PropertyList.add("O=" + sbPathWithoutCC.toString(), 1.0, pl); // P-collapsed path without repetition pl = PropertyList.add("P=" + sbPathWithoutRep.toString(), 1.0, pl); return pl; }
private PropertyList addConstituentFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { Sentence arg2Sentence = doc.getSentence(arg2Line); String conn = arg2Sentence.toString(connStart, connEnd); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); int arg1Line = candidate.first(); Tree arg1Tree = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); List<String> path = new ArrayList<String>(); List<String> pathWithoutPOS = new ArrayList<String>(); if (arg1Line == arg2Line) { Tree root = arg1Tree; List<Tree> leaves = root.getLeaves(); List<Tree> treePath = root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(arg1HeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } else { Tree arg2Root = arg2Sentence.getParseTree(); Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0)); List<Tree> leaves = arg2Root.getLeaves(); int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead); if (mainHeadPos != -1) { List<Tree> treePath = arg2Root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(mainHeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) { path.add("SENT"); pathWithoutPOS.add("SENT"); } Tree arg1Root = arg1Tree; mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0)); leaves = arg1Root.getLeaves(); mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead); if (mainHeadPos != -1) { List<Tree> treePath = arg1Root.pathNodeToNode(leaves.get(mainHeadPos), leaves.get(arg1HeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } } // H-full path // L-C&H StringBuilder fullPath = new StringBuilder(); for (String node : path) { fullPath.append(node).append(":"); } pl = PropertyList.add("H=" + fullPath.toString(), 1.0, pl); pl = PropertyList.add("L=CONN-" + conn + "&" + "H-" + fullPath.toString(), 1.0, pl); // I-length of path pl = PropertyList.add("I=" + path.size(), 1.0, pl); // J-collapsed path without part of speech // K-collapsed path without repititions fullPath = new StringBuilder(); StringBuilder collapsedPath = new StringBuilder(); String prev = ""; for (String node : pathWithoutPOS) { fullPath.append(node).append(":"); if (!node.equals(prev)) { collapsedPath.append(node).append(":"); } prev = node; } pl = PropertyList.add("J=" + fullPath.toString(), 1.0, pl); pl = PropertyList.add("K=" + collapsedPath.toString(), 1.0, pl); return pl; }
private void addInstancesThroughPipe( PDTBRelation relation, Document document, int arg1Line, int arg2Line, InstanceList instanceList) { // System.out.println("Relation: " + relation.toString()); // System.out.println("arg1Line: " + arg1Line); // System.out.println("arg2Line: " + arg2Line); String connectiveGornAddress = relation.getConnectiveGornAddress(); Tree arg2Tree = document.getTree(arg2Line); List<Tree> connHeadLeaves = connAnalyzer.getConnHeadLeaves(arg2Tree, connectiveGornAddress, relation.getConnHead()); if (connHeadLeaves.isEmpty()) return; int connStart = treeAnalyzer.getLeafPosition(arg2Tree, connHeadLeaves.get(0)); int connEnd = treeAnalyzer.getLeafPosition(arg2Tree, connHeadLeaves.get(connHeadLeaves.size() - 1)); if ((connEnd - connStart) > 4) { // handle if..else, etc. connEnd = connStart; } // consider only the first sentence in case of multi-line argument1 String arg1GornAddress = relation.getArg1GornAddress(); Tree arg1Tree = document.getTree(arg1Line); List<Tree> arg1GornNodes = getArgGornNodes(arg1Tree, arg1Line, arg1GornAddress); Tree syntacticHead = headAnalyzer.getSyntacticHead(arg1Tree, arg1GornNodes); int arg1HeadPos = treeAnalyzer.getLeafPosition(arg1Tree, syntacticHead); String arg2GornAddress = relation.getArg2GornAddress(); List<Tree> arg2GornNodes = getArgGornNodes(arg2Tree, arg2Line, arg2GornAddress); Tree arg2SyntacticHead = headAnalyzer.getSyntacticHead(arg2Tree, arg2GornNodes); int arg2HeadPos = treeAnalyzer.getLeafPosition(arg2Tree, arg2SyntacticHead); if (arg2HeadPos == -1) { System.out.println("arg2Head == -1"); return; } if (arg1HeadPos == -1) { System.out.println("arg1Head == -1"); return; } int trueCandidate = -1; List<Pair<Integer, Integer>> candidates = getCandidates(document, arg2Line, connStart, connEnd, arg1Line); for (int i = 0; i < candidates.size(); i++) { Pair<Integer, Integer> candidate = candidates.get(i); if (candidate.first() == arg1Line && candidate.second() == arg1HeadPos) { trueCandidate = i; break; } } if (trueCandidate == -1) { // trueCandidate = candidates.size(); // candidates.add(new Pair<Integer, Integer>(arg1Line, arg1HeadPos)); // System.out.println("Covered!"); System.out.println("true candidate == -1!!!"); System.out.println(syntacticHead.value()); } else { int extractArg2 = ARG2_EXTRACTOR.extractArg2( document.getSentence(arg2Line), document.getTree(arg2Line), document.getDepGraph(arg2Line), connStart, connEnd); if (extractArg2 == -1) { extractArg2 = 0; System.out.println("Arg2 == -1!!!!!!!!!!!!!!!!!"); } // Arg1RankInstance instance = new Arg1RankInstance(document, candidates, arg2Line, // extractArg2, connStart, connEnd, trueCandidate); Arg1RankInstance instance = new Arg1RankInstance( document, candidates, arg2Line, arg2HeadPos, connStart, connEnd, trueCandidate); instanceList.addThruPipe(instance); } }