List<Util.Pair<Integer, Integer>> getCandidates( Document document, int arg2Line, int connStart, int connEnd, int arg1Line) { List<Util.Pair<Integer, Integer>> candidates = new ArrayList<Util.Pair<Integer, Integer>>(); int distance = 10; Sentence arg2Sentence = document.getSentence(arg2Line); // String conn = arg2Sentence.toString(connStart, connEnd).toLowerCase(); // String category = connAnalyzer.getCategory(conn); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); SimpleDepGraph arg2DepGraph = document.getDepGraph(arg2Line); List<Integer> reachable = arg2DepGraph.getReachableIndices(connHeadPos, false, distance); for (Integer i : reachable) { if (arg2Sentence.get(i).getTag("POS").matches("VB.*|NNS?|JJ.*|MD")) { candidates.add(new Util.Pair<Integer, Integer>(arg2Line, i)); } } Tree mainHead = headAnalyzer.getCollinsHead(arg2Sentence.getParseTree().getChild(0)); if (mainHead != null) { int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Sentence.getParseTree(), mainHead); List<String> pathAsList = arg2DepGraph.getPathAsList(connHeadPos, mainHeadPos, false); if (pathAsList != null) { distance = distance - (1 + pathAsList.size()); } else { // System.out.println("No path from connHead to mainHead!"); distance--; } } // if (arg1Line == arg2Line) return candidates; for (int i = arg2Line - 1; i >= 0 && distance >= 0; i--) { Sentence sentence = document.getSentence(i); SimpleDepGraph depGraph = document.getDepGraph(i); mainHead = headAnalyzer.getCollinsHead(sentence.getParseTree().getChild(0)); if (mainHead == null) { distance--; continue; } int mainHeadPos = treeAnalyzer.getLeafPosition(sentence.getParseTree(), mainHead); reachable = depGraph.getReachableIndices(mainHeadPos, false, distance); if (reachable == null) { distance--; continue; } for (Integer j : reachable) { if (sentence.get(j).getTag("POS").matches("VB.*|NNS?|JJ.*|MD")) { candidates.add(new Util.Pair<Integer, Integer>(i, j)); } } distance -= 2; } return candidates; }
private PropertyList addDependencyFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { Sentence arg2Sentence = doc.getSentence(arg2Line); String conn = arg2Sentence.toString(connStart, connEnd); String category = connAnalyzer.getCategory(conn.toLowerCase()); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); int arg1Line = candidate.first(); Tree arg1Tree = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); List<String> path = new ArrayList<String>(); if (arg1Line == arg2Line) { SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); List<String> tmpPath = depGraph.getPathAsList(connHeadPos, arg1HeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } else { Tree arg2Root = arg2Sentence.getParseTree(); Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0)); int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead); if (mainHeadPos != -1) { SimpleDepGraph depGraph = doc.getDepGraph(arg2Line); List<String> tmpPath = depGraph.getPathAsList(connHeadPos, mainHeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) { path.add("SENT"); } Tree arg1Root = arg1Tree; mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0)); mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead); if (mainHeadPos != -1) { SimpleDepGraph depGraph = doc.getDepGraph(arg1Line); List<String> tmpPath = depGraph.getPathAsList(mainHeadPos, arg1HeadPos, false); if (tmpPath != null) { path.addAll(tmpPath); } else { path.add("null"); } } } StringBuilder sbPath = new StringBuilder(); StringBuilder sbPathWithoutCC = new StringBuilder(); StringBuilder sbPathWithoutRep = new StringBuilder(); String prev = ""; for (String node : path) { sbPath.append(node).append(":"); if (!node.matches("cc|-cc")) { sbPathWithoutCC.append(node).append(":"); } if (!node.equals(prev)) { sbPathWithoutRep.append(node).append(":"); } prev = node; } // M-dependency path pl = PropertyList.add("M=" + sbPath.toString(), 1.0, pl); // Q-M&C pl = PropertyList.add("Q=" + "CONN-" + conn + '&' + "M-" + sbPath.toString(), 1.0, pl); // T-M&R pl = PropertyList.add("T=" + "CAT-" + category + '&' + "M-" + sbPath.toString(), 1.0, pl); // O-collapsed path without cc pl = PropertyList.add("O=" + sbPathWithoutCC.toString(), 1.0, pl); // P-collapsed path without repetition pl = PropertyList.add("P=" + sbPathWithoutRep.toString(), 1.0, pl); return pl; }
private PropertyList addConstituentFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { Sentence arg2Sentence = doc.getSentence(arg2Line); String conn = arg2Sentence.toString(connStart, connEnd); int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd); int arg1Line = candidate.first(); Tree arg1Tree = doc.getTree(arg1Line); int arg1HeadPos = candidate.second(); List<String> path = new ArrayList<String>(); List<String> pathWithoutPOS = new ArrayList<String>(); if (arg1Line == arg2Line) { Tree root = arg1Tree; List<Tree> leaves = root.getLeaves(); List<Tree> treePath = root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(arg1HeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } else { Tree arg2Root = arg2Sentence.getParseTree(); Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0)); List<Tree> leaves = arg2Root.getLeaves(); int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead); if (mainHeadPos != -1) { List<Tree> treePath = arg2Root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(mainHeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) { path.add("SENT"); pathWithoutPOS.add("SENT"); } Tree arg1Root = arg1Tree; mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0)); leaves = arg1Root.getLeaves(); mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead); if (mainHeadPos != -1) { List<Tree> treePath = arg1Root.pathNodeToNode(leaves.get(mainHeadPos), leaves.get(arg1HeadPos)); if (treePath != null) { for (Tree t : treePath) { if (!t.isLeaf()) { path.add(t.value()); if (!t.isPreTerminal()) { pathWithoutPOS.add(t.value()); } } } } } } // H-full path // L-C&H StringBuilder fullPath = new StringBuilder(); for (String node : path) { fullPath.append(node).append(":"); } pl = PropertyList.add("H=" + fullPath.toString(), 1.0, pl); pl = PropertyList.add("L=CONN-" + conn + "&" + "H-" + fullPath.toString(), 1.0, pl); // I-length of path pl = PropertyList.add("I=" + path.size(), 1.0, pl); // J-collapsed path without part of speech // K-collapsed path without repititions fullPath = new StringBuilder(); StringBuilder collapsedPath = new StringBuilder(); String prev = ""; for (String node : pathWithoutPOS) { fullPath.append(node).append(":"); if (!node.equals(prev)) { collapsedPath.append(node).append(":"); } prev = node; } pl = PropertyList.add("J=" + fullPath.toString(), 1.0, pl); pl = PropertyList.add("K=" + collapsedPath.toString(), 1.0, pl); return pl; }
private PropertyList addBaselineFeatures( PropertyList pl, Document doc, Pair<Integer, Integer> candidate, int arg2Line, int arg2HeadPos, int connStart, int connEnd) { Sentence arg2Sentence = doc.getSentence(arg2Line); int arg1Line = candidate.first(); Sentence arg1Sentence = doc.getSentence(arg1Line); int arg1HeadPos = candidate.second(); String conn = arg2Sentence.toString(connStart, connEnd); // R-connective type String category = connAnalyzer.getCategory(conn.toLowerCase()); pl = PropertyList.add("R=" + category, 1.0, pl); // A-position of the connective String position = "Medial"; if (connStart < 4) position = "Initial"; else if (connEnd >= (arg1Sentence.size() - 3)) position = "Terminal"; pl = PropertyList.add("A=" + position, 1.0, pl); // S-A & R pl = PropertyList.add("S=" + position + "&" + category, 1.0, pl); // C-connective phrase pl = PropertyList.add("C=" + conn, 1.0, pl); // D-downcase conn phrase pl = PropertyList.add("D=" + conn.toLowerCase(), 1.0, pl); // E-argument head word pl = PropertyList.add("E=" + arg1Sentence.get(arg1HeadPos).word(), 1.0, pl); // B-same sentence or not pl = PropertyList.add("B=" + (arg1Line == arg2Line), 1.0, pl); // G-A&B pl = PropertyList.add("A=" + position + "&" + "B=" + (arg1Line == arg2Line), 1.0, pl); // F-arg1 head prior or after conn if (arg1Line < arg2Line || arg1HeadPos < connStart) { pl = PropertyList.add("F=<", 1.0, pl); } else { pl = PropertyList.add("F=>", 1.0, pl); } // if (1 < 2) return pl; // Z1-relative position of arg1-conn-arg2 String z = null; if (arg1Line < arg2Line) { if (arg2HeadPos < connStart) z = "ARG1-ARG2-CONN"; else z = "ARG1-CONN-ARG2"; } else if (arg1HeadPos < connStart) { if (arg2HeadPos < arg1HeadPos) z = "ARG2-ARG1-CONN"; else if (arg2HeadPos < connStart) z = "ARG1-ARG2-CONN"; else z = "ARG1-CONN-ARG2"; } else if (arg2HeadPos < connStart) { z = "ARG2-CONN-ARG1"; } else if (arg2HeadPos < arg1HeadPos) { z = "CONN-ARG2-ARG1"; } else { z = "CONN-ARG1-ARG2"; } pl = PropertyList.add("Z=" + z, 1.0, pl); // Z2-Conn&Z1 pl = PropertyList.add("CONN=" + conn + "&" + "Z=" + z, 1.0, pl); return pl; }
private void addInstancesThroughPipe( PDTBRelation relation, Document document, int arg1Line, int arg2Line, InstanceList instanceList) { // System.out.println("Relation: " + relation.toString()); // System.out.println("arg1Line: " + arg1Line); // System.out.println("arg2Line: " + arg2Line); String connectiveGornAddress = relation.getConnectiveGornAddress(); Tree arg2Tree = document.getTree(arg2Line); List<Tree> connHeadLeaves = connAnalyzer.getConnHeadLeaves(arg2Tree, connectiveGornAddress, relation.getConnHead()); if (connHeadLeaves.isEmpty()) return; int connStart = treeAnalyzer.getLeafPosition(arg2Tree, connHeadLeaves.get(0)); int connEnd = treeAnalyzer.getLeafPosition(arg2Tree, connHeadLeaves.get(connHeadLeaves.size() - 1)); if ((connEnd - connStart) > 4) { // handle if..else, etc. connEnd = connStart; } // consider only the first sentence in case of multi-line argument1 String arg1GornAddress = relation.getArg1GornAddress(); Tree arg1Tree = document.getTree(arg1Line); List<Tree> arg1GornNodes = getArgGornNodes(arg1Tree, arg1Line, arg1GornAddress); Tree syntacticHead = headAnalyzer.getSyntacticHead(arg1Tree, arg1GornNodes); int arg1HeadPos = treeAnalyzer.getLeafPosition(arg1Tree, syntacticHead); String arg2GornAddress = relation.getArg2GornAddress(); List<Tree> arg2GornNodes = getArgGornNodes(arg2Tree, arg2Line, arg2GornAddress); Tree arg2SyntacticHead = headAnalyzer.getSyntacticHead(arg2Tree, arg2GornNodes); int arg2HeadPos = treeAnalyzer.getLeafPosition(arg2Tree, arg2SyntacticHead); if (arg2HeadPos == -1) { System.out.println("arg2Head == -1"); return; } if (arg1HeadPos == -1) { System.out.println("arg1Head == -1"); return; } int trueCandidate = -1; List<Pair<Integer, Integer>> candidates = getCandidates(document, arg2Line, connStart, connEnd, arg1Line); for (int i = 0; i < candidates.size(); i++) { Pair<Integer, Integer> candidate = candidates.get(i); if (candidate.first() == arg1Line && candidate.second() == arg1HeadPos) { trueCandidate = i; break; } } if (trueCandidate == -1) { // trueCandidate = candidates.size(); // candidates.add(new Pair<Integer, Integer>(arg1Line, arg1HeadPos)); // System.out.println("Covered!"); System.out.println("true candidate == -1!!!"); System.out.println(syntacticHead.value()); } else { int extractArg2 = ARG2_EXTRACTOR.extractArg2( document.getSentence(arg2Line), document.getTree(arg2Line), document.getDepGraph(arg2Line), connStart, connEnd); if (extractArg2 == -1) { extractArg2 = 0; System.out.println("Arg2 == -1!!!!!!!!!!!!!!!!!"); } // Arg1RankInstance instance = new Arg1RankInstance(document, candidates, arg2Line, // extractArg2, connStart, connEnd, trueCandidate); Arg1RankInstance instance = new Arg1RankInstance( document, candidates, arg2Line, arg2HeadPos, connStart, connEnd, trueCandidate); instanceList.addThruPipe(instance); } }
/** * Shows accuracy according to Ben Wellner's definition of accuracy * * @param classifier * @param instanceList */ private void showAccuracy(Classifier classifier, InstanceList instanceList) throws IOException { int total = instanceList.size(); int correct = 0; HashMap<String, Integer> errorMap = new HashMap<String, Integer>(); FileWriter errorWriter = new FileWriter("arg1Error.log"); for (Instance instance : instanceList) { Classification classification = classifier.classify(instance); if (classification.bestLabelIsCorrect()) { correct++; } else { Arg1RankInstance rankInstance = (Arg1RankInstance) instance; Document doc = rankInstance.getDocument(); Sentence s = doc.getSentence(rankInstance.getArg2Line()); String conn = s.toString(rankInstance.getConnStart(), rankInstance.getConnEnd()).toLowerCase(); // String category = connAnalyzer.getCategory(conn); if (errorMap.containsKey(conn)) { errorMap.put(conn, errorMap.get(conn) + 1); } else { errorMap.put(conn, 1); } int arg2Line = rankInstance.getArg2Line(); int arg1Line = rankInstance.getCandidates().get(rankInstance.getTrueArg1Candidate()).first(); int arg1HeadPos = rankInstance.getCandidates().get(rankInstance.getTrueArg1Candidate()).second(); int predictedCandidateIndex = Integer.parseInt(classification.getLabeling().getBestLabel().toString()); if (arg1Line == arg2Line) { errorWriter.write("FileName: " + doc.getFileName() + "\n"); errorWriter.write("Sentential\n"); errorWriter.write("Conn: " + conn + "\n"); errorWriter.write("Arg1Head: " + s.get(arg1HeadPos).word() + "\n"); errorWriter.write(s.toString() + "\n\n"); } else { errorWriter.write("FileName: " + doc.getFileName() + "\n"); errorWriter.write("Inter-Sentential\n"); errorWriter.write("Arg1 in : " + arg1Line + "\n"); errorWriter.write("Arg2 in : " + arg2Line + "\n"); errorWriter.write("Conn: " + conn + "\n"); errorWriter.write(s.toString() + "\n"); Sentence s1 = doc.getSentence(arg1Line); errorWriter.write("Arg1Head: " + s1.get(arg1HeadPos) + "\n"); errorWriter.write(s1.toString() + "\n\n"); } int predictedArg1Line = rankInstance.getCandidates().get(predictedCandidateIndex).first(); int predictedArg1HeadPos = rankInstance.getCandidates().get(predictedCandidateIndex).second(); Sentence pSentence = doc.getSentence(predictedArg1Line); errorWriter.write( "Predicted arg1 sentence: " + pSentence.toString() + " [Correct: " + (predictedArg1Line == arg1Line) + "]\n"); errorWriter.write("Predicted head: " + pSentence.get(predictedArg1HeadPos).word() + "\n\n"); } } errorWriter.close(); Set<Entry<String, Integer>> entrySet = errorMap.entrySet(); List<Entry<String, Integer>> list = new ArrayList<Entry<String, Integer>>(entrySet); Collections.sort( list, new Comparator<Entry<String, Integer>>() { @Override public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { if (o1.getValue() > o2.getValue()) return -1; else if (o1.getValue() < o2.getValue()) return 1; return 0; } }); for (Entry<String, Integer> item : list) { System.out.println(item.getKey() + "-" + item.getValue()); } System.out.println("Total: " + total); System.out.println("Correct: " + correct); System.out.println("Accuracy: " + (1.0 * correct) / total); }