private int add(AmbiguityClass a) { if (classes.contains(a)) { return classes.indexOf(a); } classes.add(a); return classes.indexOf(a); }
/** * Adds dependencies to list depList. These are in terms of the original tag set not the reduced * (projected) tag set. */ protected static EndHead treeToDependencyHelper( Tree tree, List<IntDependency> depList, int loc, Index<String> wordIndex, Index<String> tagIndex) { // try { // PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out,"GB18030"),true); // tree.pennPrint(pw); // } // catch (UnsupportedEncodingException e) {} if (tree.isLeaf() || tree.isPreTerminal()) { EndHead tempEndHead = new EndHead(); tempEndHead.head = loc; tempEndHead.end = loc + 1; return tempEndHead; } Tree[] kids = tree.children(); if (kids.length == 1) { return treeToDependencyHelper(kids[0], depList, loc, wordIndex, tagIndex); } EndHead tempEndHead = treeToDependencyHelper(kids[0], depList, loc, wordIndex, tagIndex); int lHead = tempEndHead.head; int split = tempEndHead.end; tempEndHead = treeToDependencyHelper(kids[1], depList, tempEndHead.end, wordIndex, tagIndex); int end = tempEndHead.end; int rHead = tempEndHead.head; String hTag = ((HasTag) tree.label()).tag(); String lTag = ((HasTag) kids[0].label()).tag(); String rTag = ((HasTag) kids[1].label()).tag(); String hWord = ((HasWord) tree.label()).word(); String lWord = ((HasWord) kids[0].label()).word(); String rWord = ((HasWord) kids[1].label()).word(); boolean leftHeaded = hWord.equals(lWord); String aTag = (leftHeaded ? rTag : lTag); String aWord = (leftHeaded ? rWord : lWord); int hT = tagIndex.indexOf(hTag); int aT = tagIndex.indexOf(aTag); int hW = (wordIndex.contains(hWord) ? wordIndex.indexOf(hWord) : wordIndex.indexOf(Lexicon.UNKNOWN_WORD)); int aW = (wordIndex.contains(aWord) ? wordIndex.indexOf(aWord) : wordIndex.indexOf(Lexicon.UNKNOWN_WORD)); int head = (leftHeaded ? lHead : rHead); int arg = (leftHeaded ? rHead : lHead); IntDependency dependency = new IntDependency( hW, hT, aW, aT, leftHeaded, (leftHeaded ? split - head - 1 : head - split)); depList.add(dependency); IntDependency stopL = new IntDependency( aW, aT, STOP_WORD_INT, STOP_TAG_INT, false, (leftHeaded ? arg - split : arg - loc)); depList.add(stopL); IntDependency stopR = new IntDependency( aW, aT, STOP_WORD_INT, STOP_TAG_INT, true, (leftHeaded ? end - arg - 1 : split - arg - 1)); depList.add(stopR); // System.out.println("Adding: "+dependency+" at "+tree.label()); tempEndHead.head = head; return tempEndHead; }
/** * Checks whether a word is in the lexicon. This version works even while compiling lexicon with * current counters (rather than using the compiled rulesWithWord array). * * <p>TODO: The previous version would insert rules into the wordNumberer. Is that the desired * behavior? Why not test in some way that doesn't affect the index? For example, start by testing * wordIndex.contains(word). * * @param word The word as a String * @return Whether the word is in the lexicon */ public boolean isKnown(String word) { if (!wordIndex.contains(word)) return false; IntTaggedWord iW = new IntTaggedWord(wordIndex.indexOf(word), nullTag); return seenCounter.getCount(iW) > 0.0; }