private int add(AmbiguityClass a) {
   if (classes.contains(a)) {
     return classes.indexOf(a);
   }
   classes.add(a);
   return classes.indexOf(a);
 }
  /**
   * Adds dependencies to list depList. These are in terms of the original tag set not the reduced
   * (projected) tag set.
   */
  protected static EndHead treeToDependencyHelper(
      Tree tree,
      List<IntDependency> depList,
      int loc,
      Index<String> wordIndex,
      Index<String> tagIndex) {
    //       try {
    // 	PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out,"GB18030"),true);
    // 	tree.pennPrint(pw);
    //       }
    //       catch (UnsupportedEncodingException e) {}

    if (tree.isLeaf() || tree.isPreTerminal()) {
      EndHead tempEndHead = new EndHead();
      tempEndHead.head = loc;
      tempEndHead.end = loc + 1;
      return tempEndHead;
    }
    Tree[] kids = tree.children();
    if (kids.length == 1) {
      return treeToDependencyHelper(kids[0], depList, loc, wordIndex, tagIndex);
    }
    EndHead tempEndHead = treeToDependencyHelper(kids[0], depList, loc, wordIndex, tagIndex);
    int lHead = tempEndHead.head;
    int split = tempEndHead.end;
    tempEndHead = treeToDependencyHelper(kids[1], depList, tempEndHead.end, wordIndex, tagIndex);
    int end = tempEndHead.end;
    int rHead = tempEndHead.head;
    String hTag = ((HasTag) tree.label()).tag();
    String lTag = ((HasTag) kids[0].label()).tag();
    String rTag = ((HasTag) kids[1].label()).tag();
    String hWord = ((HasWord) tree.label()).word();
    String lWord = ((HasWord) kids[0].label()).word();
    String rWord = ((HasWord) kids[1].label()).word();
    boolean leftHeaded = hWord.equals(lWord);
    String aTag = (leftHeaded ? rTag : lTag);
    String aWord = (leftHeaded ? rWord : lWord);
    int hT = tagIndex.indexOf(hTag);
    int aT = tagIndex.indexOf(aTag);
    int hW =
        (wordIndex.contains(hWord)
            ? wordIndex.indexOf(hWord)
            : wordIndex.indexOf(Lexicon.UNKNOWN_WORD));
    int aW =
        (wordIndex.contains(aWord)
            ? wordIndex.indexOf(aWord)
            : wordIndex.indexOf(Lexicon.UNKNOWN_WORD));
    int head = (leftHeaded ? lHead : rHead);
    int arg = (leftHeaded ? rHead : lHead);
    IntDependency dependency =
        new IntDependency(
            hW, hT, aW, aT, leftHeaded, (leftHeaded ? split - head - 1 : head - split));
    depList.add(dependency);
    IntDependency stopL =
        new IntDependency(
            aW, aT, STOP_WORD_INT, STOP_TAG_INT, false, (leftHeaded ? arg - split : arg - loc));
    depList.add(stopL);
    IntDependency stopR =
        new IntDependency(
            aW,
            aT,
            STOP_WORD_INT,
            STOP_TAG_INT,
            true,
            (leftHeaded ? end - arg - 1 : split - arg - 1));
    depList.add(stopR);
    // System.out.println("Adding: "+dependency+" at "+tree.label());
    tempEndHead.head = head;
    return tempEndHead;
  }
Beispiel #3
0
 /**
  * Checks whether a word is in the lexicon. This version works even while compiling lexicon with
  * current counters (rather than using the compiled rulesWithWord array).
  *
  * <p>TODO: The previous version would insert rules into the wordNumberer. Is that the desired
  * behavior? Why not test in some way that doesn't affect the index? For example, start by testing
  * wordIndex.contains(word).
  *
  * @param word The word as a String
  * @return Whether the word is in the lexicon
  */
 public boolean isKnown(String word) {
   if (!wordIndex.contains(word)) return false;
   IntTaggedWord iW = new IntTaggedWord(wordIndex.indexOf(word), nullTag);
   return seenCounter.getCount(iW) > 0.0;
 }