private PropertyList addLexicoSyntacticFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { int arg1Line = candidate.first(); Tree root = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); boolean attributive = false; String head = root.getLeaves().get(arg1HeadPos).value(); for (String verb : attributiveVerb) { if (head.matches(verb)) { attributive = true; break; } } pl = PropertyList.add("U=" + attributive, 1.0, pl); SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); boolean hasClausalComp = false; List<SimpleDependency> govDependencies = depGraph.getGovDependencies(arg1HeadPos); for (SimpleDependency dep : govDependencies) { if (dep.reln().equals("ccomp")) { hasClausalComp = true; break; } } pl = PropertyList.add("V=" + hasClausalComp, 1.0, pl); pl = PropertyList.add("W=" + attributive + "&" + hasClausalComp, 1.0, pl); boolean isClausalComp = false; List<SimpleDependency> depDependencies = depGraph.getDepDependencies(arg1HeadPos); SimpleDependency clausalComp = null; for (SimpleDependency dep : depDependencies) { if (dep.reln().equals("ccomp")) { isClausalComp = true; clausalComp = dep; break; } } pl = PropertyList.add("X=" + isClausalComp, 1.0, pl); if (isClausalComp) { int gov = clausalComp.gov(); String govWord = root.getLeaves().get(gov).value(); boolean isGovAttributive = false; for (String verb : attributiveVerb) { if (govWord.matches(verb)) { isGovAttributive = true; break; } } pl = PropertyList.add("Y=" + isClausalComp + "&" + isGovAttributive, 1.0, pl); } return pl; }
private PropertyList addConstituentFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { Sentence arg2Sentence = doc.getSentence(arg2Line); String conn = arg2Sentence.toString(connStart, connEnd); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); int arg1Line = candidate.first(); Tree arg1Tree = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); List<String> path = new ArrayList<String>(); List<String> pathWithoutPOS = new ArrayList<String>(); if (arg1Line == arg2Line) { Tree root = arg1Tree; List<Tree> leaves = root.getLeaves(); List<Tree> treePath = root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(arg1HeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } else { Tree arg2Root = arg2Sentence.getParseTree(); Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0)); List<Tree> leaves = arg2Root.getLeaves(); int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead); if (mainHeadPos != -1) { List<Tree> treePath = arg2Root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(mainHeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) { path.add("SENT"); pathWithoutPOS.add("SENT"); } Tree arg1Root = arg1Tree; mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0)); leaves = arg1Root.getLeaves(); mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead); if (mainHeadPos != -1) { List<Tree> treePath = arg1Root.pathNodeToNode(leaves.get(mainHeadPos), leaves.get(arg1HeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } } // H-full path // L-C&H StringBuilder fullPath = new StringBuilder(); for (String node : path) { fullPath.append(node).append(":"); } pl = PropertyList.add("H=" + fullPath.toString(), 1.0, pl); pl = PropertyList.add("L=CONN-" + conn + "&" + "H-" + fullPath.toString(), 1.0, pl); // I-length of path pl = PropertyList.add("I=" + path.size(), 1.0, pl); // J-collapsed path without part of speech // K-collapsed path without repititions fullPath = new StringBuilder(); StringBuilder collapsedPath = new StringBuilder(); String prev = ""; for (String node : pathWithoutPOS) { fullPath.append(node).append(":"); if (!node.equals(prev)) { collapsedPath.append(node).append(":"); } prev = node; } pl = PropertyList.add("J=" + fullPath.toString(), 1.0, pl); pl = PropertyList.add("K=" + collapsedPath.toString(), 1.0, pl); return pl; }
private PropertyList addDependencyFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { Sentence arg2Sentence = doc.getSentence(arg2Line); String conn = arg2Sentence.toString(connStart, connEnd); String category = connAnalyzer.getCategory(conn.toLowerCase()); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); int arg1Line = candidate.first(); Tree arg1Tree = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); List<String> path = new ArrayList<String>(); if (arg1Line == arg2Line) { SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); List<String> tmpPath = depGraph.getPathAsList(connHeadPos, arg1HeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } else { Tree arg2Root = arg2Sentence.getParseTree(); Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0)); int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead); if (mainHeadPos != -1) { SimpleDepGraph depGraph = doc.getDepGraph(arg2Line); List<String> tmpPath = depGraph.getPathAsList(connHeadPos, mainHeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) { path.add("SENT"); } Tree arg1Root = arg1Tree; mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0)); mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead); if (mainHeadPos != -1) { SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); List<String> tmpPath = depGraph.getPathAsList(mainHeadPos, arg1HeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } } StringBuilder sbPath = new StringBuilder(); StringBuilder sbPathWithoutCC = new StringBuilder(); StringBuilder sbPathWithoutRep = new StringBuilder(); String prev = ""; for (String node : path) { sbPath.append(node).append(":"); if (!node.matches("cc|-cc")) { sbPathWithoutCC.append(node).append(":"); } if (!node.equals(prev)) { sbPathWithoutRep.append(node).append(":"); } prev = node; } // M-dependency path pl = PropertyList.add("M=" + sbPath.toString(), 1.0, pl); // Q-M&C pl = PropertyList.add("Q=" + "CONN-" + conn + '&' + "M-" + sbPath.toString(), 1.0, pl); // T-M&R pl = PropertyList.add("T=" + "CAT-" + category + '&' + "M-" + sbPath.toString(), 1.0, pl); // O-collapsed path without cc pl = PropertyList.add("O=" + sbPathWithoutCC.toString(), 1.0, pl); // P-collapsed path without repetition pl = PropertyList.add("P=" + sbPathWithoutRep.toString(), 1.0, pl); return pl; }
private void addInstancesThroughPipe( PDTBRelation relation, Document document, int arg1Line, int arg2Line, InstanceList instanceList) { // System.out.println("Relation: " + relation.toString()); // System.out.println("arg1Line: " + arg1Line); // System.out.println("arg2Line: " + arg2Line); String connectiveGornAddress = relation.getConnectiveGornAddress(); Tree arg2Tree = document.getTree(arg2Line); List<Tree> connHeadLeaves = connAnalyzer.getConnHeadLeaves(arg2Tree, connectiveGornAddress, relation.getConnHead()); if (connHeadLeaves.isEmpty()) return; int connStart = treeAnalyzer.getLeafPosition(arg2Tree, connHeadLeaves.get(0)); int connEnd = treeAnalyzer.getLeafPosition(arg2Tree, connHeadLeaves.get(connHeadLeaves.size() - 1)); if ((connEnd - connStart) > 4) { // handle if..else, etc. connEnd = connStart; } // consider only the first sentence in case of multi-line argument1 String arg1GornAddress = relation.getArg1GornAddress(); Tree arg1Tree = document.getTree(arg1Line); List<Tree> arg1GornNodes = getArgGornNodes(arg1Tree, arg1Line, arg1GornAddress); Tree syntacticHead = headAnalyzer.getSyntacticHead(arg1Tree, arg1GornNodes); int arg1HeadPos = treeAnalyzer.getLeafPosition(arg1Tree, syntacticHead); String arg2GornAddress = relation.getArg2GornAddress(); List<Tree> arg2GornNodes = getArgGornNodes(arg2Tree, arg2Line, arg2GornAddress); Tree arg2SyntacticHead = headAnalyzer.getSyntacticHead(arg2Tree, arg2GornNodes); int arg2HeadPos = treeAnalyzer.getLeafPosition(arg2Tree, arg2SyntacticHead); if (arg2HeadPos == -1) { System.out.println("arg2Head == -1"); return; } if (arg1HeadPos == -1) { System.out.println("arg1Head == -1"); return; } int trueCandidate = -1; List<Pair<Integer, Integer>> candidates = getCandidates(document, arg2Line, connStart, connEnd, arg1Line); for (int i = 0; i < candidates.size(); i++) { Pair<Integer, Integer> candidate = candidates.get(i); if (candidate.first() == arg1Line && candidate.second() == arg1HeadPos) { trueCandidate = i; break; } } if (trueCandidate == -1) { // trueCandidate = candidates.size(); // candidates.add(new Pair<Integer, Integer>(arg1Line, arg1HeadPos)); // System.out.println("Covered!"); System.out.println("true candidate == -1!!!"); System.out.println(syntacticHead.value()); } else { int extractArg2 = ARG2_EXTRACTOR.extractArg2( document.getSentence(arg2Line), document.getTree(arg2Line), document.getDepGraph(arg2Line), connStart, connEnd); if (extractArg2 == -1) { extractArg2 = 0; System.out.println("Arg2 == -1!!!!!!!!!!!!!!!!!"); } // Arg1RankInstance instance = new Arg1RankInstance(document, candidates, arg2Line, // extractArg2, connStart, connEnd, trueCandidate); Arg1RankInstance instance = new Arg1RankInstance( document, candidates, arg2Line, arg2HeadPos, connStart, connEnd, trueCandidate); instanceList.addThruPipe(instance); } }