public static DepType getDepType(DependencyTree tree, int pIdx, int aIdx) { Bunsetsu pBun = tree.getBunsetsuFromNodeId(pIdx); Bunsetsu aBun = tree.getBunsetsuFromNodeId(aIdx); Bunsetsu pBunHeadBun = tree.getBunsetsuFromId(pBun.getHead()); Bunsetsu aBunHeadBun = tree.getBunsetsuFromId(aBun.getHead()); if (pBun.getId() == aBun.getId()) { return DepType.SAME_PHRASE; } else if (pBun.getId() == aBunHeadBun.getId()) { return DepType.DEP; } else if (pBunHeadBun.getId() == aBunHeadBun.getId()) { return DepType.DEP; } else { return DepType.ZERO_INTRA; } }
public static ArgPositionType getArgPositionType(DependencyTree tree, int aIdx) { Bunsetsu aBun = tree.getBunsetsuFromNodeId(aIdx); DependencyNode aBunHeadNode = aBun.getHeadNode(); if (aIdx == aBunHeadNode.getId()) { return ArgPositionType.HEAD; } else if (aIdx == aBunHeadNode.getId() - 1) { return ArgPositionType.HEAD_LEFT; } else if (aIdx == aBunHeadNode.getId() + 1) { return ArgPositionType.HEAD_RIGHT; } else { return ArgPositionType.OTHER; } }
public static void main(String[] args) { // String file = args[0]; IntInt2IntHashMap lEr = new IntInt2IntHashMap(); HashMap<String, TObjectIntHashMap<String>> stat = new HashMap<String, TObjectIntHashMap<String>>(); try { HashSet<String> headPat = PatternFileParser.parse(new File("ipa_head_pat.txt")); HashSet<String> funcPat = PatternFileParser.parse(new File("ipa_func_pat.txt")); CaboCha2Dep pipe = new CaboCha2Dep(System.in); JapaneseDependencyTree2CaboCha caboChaOutPipe = new JapaneseDependencyTree2CaboCha(); caboChaOutPipe.setFormat(CaboChaFormat.OLD); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(System.out, "utf-8")); while (!pipe.eof()) { DependencyTree tree = pipe.pipePerSentence(); if (tree == null) { continue; } JapaneseDependencyTreeLib.setBunsetsuHead(tree, funcPat, headPat); PredicateArgumentStructure[] pasList = tree.getPASList(); for (int j = 0; j < pasList.length; j++) { int predId = pasList[j].getPredicateId(); String predType = pasList[j].predicateType; int[] aIds = pasList[j].getIds(); String[] aLabels = pasList[j].getLabels(); for (int k = 0; k < aIds.length; k++) { DepType dt = getDepType(tree, predId, aIds[k]); ArgPositionType apt = getArgPositionType(tree, aIds[k]); if (!stat.containsKey(aLabels[k])) { stat.put(aLabels[k], new TObjectIntHashMap<String>()); } TObjectIntHashMap<String> inner = stat.get(aLabels[k]); if (!inner.containsKey(dt.toString() + ":" + apt.toString())) { inner.put(dt.toString() + ":" + apt.toString(), 0); } inner.increment(dt.toString() + ":" + apt.toString()); aLabels[k] += ":" + dt + ":" + apt; } } StringBuilder resultStr = new StringBuilder(); resultStr.append(caboChaOutPipe.pipePerSentence(tree)); writer.write(resultStr.toString()); } // print statistics for (Iterator it = stat.keySet().iterator(); it.hasNext(); ) { String key = (String) it.next(); TObjectIntHashMap inner = stat.get(key); for (TObjectIntIterator iit = inner.iterator(); iit.hasNext(); ) { iit.advance(); System.err.print(key + "\t" + iit.key() + "\t" + iit.value() + "\n"); } } } catch (IOException e) { e.printStackTrace(); } }