private InstanceList prepareInstanceList(String[] sections) { InstanceList instanceList = new InstanceList(pipe); // prepare training data for (String section : sections) { File pdtbSection = new File(pdtbRoot, section); File[] pdtbFiles = pdtbSection.listFiles(new FileExtensionFilter(".pipe")); for (File pdtbFile : pdtbFiles) { File ptbFile = new File(ptbRoot, section + "/" + pdtbFile.getName().replace(".pipe", ".mrg")); File depFile = new File(depRoot, section + "/" + pdtbFile.getName().replace(".pipe", ".dep")); List<PDTBRelation> relations = pipedFileReader.read(pdtbFile); Document document = new Document(ptbFile, depFile); for (PDTBRelation relation : relations) { if (relation.getType().equals("Explicit")) { String gornAddress1 = relation.getArg1GornAddress(); if (gornAddress1.equals("") || gornAddress1.contains(";;")) { continue; } GornAddressList gaList1 = new GornAddressList(gornAddress1); int lineNumber1 = gaList1.get(0).getLineNumber(); String connGornAddress = relation.getConnectiveGornAddress(); if (connGornAddress.equals("")) continue; GornAddressList connGAList = new GornAddressList(connGornAddress); String gornAddress2 = relation.getArg2GornAddress(); if (gornAddress2.equals("") || gornAddress2.contains(";;")) { continue; } int lineNumber2 = connGAList.get(0).getLineNumber(); addInstancesThroughPipe(relation, document, lineNumber1, lineNumber2, instanceList); } } } } return instanceList; }
private void addInstancesThroughPipe( PDTBRelation relation, Document document, int arg1Line, int arg2Line, InstanceList instanceList) { // System.out.println("Relation: " + relation.toString()); // System.out.println("arg1Line: " + arg1Line); // System.out.println("arg2Line: " + arg2Line); String connectiveGornAddress = relation.getConnectiveGornAddress(); Tree arg2Tree = document.getTree(arg2Line); List<Tree> connHeadLeaves = connAnalyzer.getConnHeadLeaves(arg2Tree, connectiveGornAddress, relation.getConnHead()); if (connHeadLeaves.isEmpty()) return; int connStart = treeAnalyzer.getLeafPosition(arg2Tree, connHeadLeaves.get(0)); int connEnd = treeAnalyzer.getLeafPosition(arg2Tree, connHeadLeaves.get(connHeadLeaves.size() - 1)); if ((connEnd - connStart) > 4) { // handle if..else, etc. connEnd = connStart; } // consider only the first sentence in case of multi-line argument1 String arg1GornAddress = relation.getArg1GornAddress(); Tree arg1Tree = document.getTree(arg1Line); List<Tree> arg1GornNodes = getArgGornNodes(arg1Tree, arg1Line, arg1GornAddress); Tree syntacticHead = headAnalyzer.getSyntacticHead(arg1Tree, arg1GornNodes); int arg1HeadPos = treeAnalyzer.getLeafPosition(arg1Tree, syntacticHead); String arg2GornAddress = relation.getArg2GornAddress(); List<Tree> arg2GornNodes = getArgGornNodes(arg2Tree, arg2Line, arg2GornAddress); Tree arg2SyntacticHead = headAnalyzer.getSyntacticHead(arg2Tree, arg2GornNodes); int arg2HeadPos = treeAnalyzer.getLeafPosition(arg2Tree, arg2SyntacticHead); if (arg2HeadPos == -1) { System.out.println("arg2Head == -1"); return; } if (arg1HeadPos == -1) { System.out.println("arg1Head == -1"); return; } int trueCandidate = -1; List<Pair<Integer, Integer>> candidates = getCandidates(document, arg2Line, connStart, connEnd, arg1Line); for (int i = 0; i < candidates.size(); i++) { Pair<Integer, Integer> candidate = candidates.get(i); if (candidate.first() == arg1Line && candidate.second() == arg1HeadPos) { trueCandidate = i; break; } } if (trueCandidate == -1) { // trueCandidate = candidates.size(); // candidates.add(new Pair<Integer, Integer>(arg1Line, arg1HeadPos)); // System.out.println("Covered!"); System.out.println("true candidate == -1!!!"); System.out.println(syntacticHead.value()); } else { int extractArg2 = ARG2_EXTRACTOR.extractArg2( document.getSentence(arg2Line), document.getTree(arg2Line), document.getDepGraph(arg2Line), connStart, connEnd); if (extractArg2 == -1) { extractArg2 = 0; System.out.println("Arg2 == -1!!!!!!!!!!!!!!!!!"); } // Arg1RankInstance instance = new Arg1RankInstance(document, candidates, arg2Line, // extractArg2, connStart, connEnd, trueCandidate); Arg1RankInstance instance = new Arg1RankInstance( document, candidates, arg2Line, arg2HeadPos, connStart, connEnd, trueCandidate); instanceList.addThruPipe(instance); } }