Ejemplo n.º 1
0
 protected void processRoot(
     SentenceParser.Node root, List<String> sentence, List<String> tags, List<String> target) {
   if (root != null) {
     TreeElement[] elements = root.getElements();
     for (int i = 0; i < elements.length; i++) {
       if (elements[i].isLeaf()) {
         processLeaf((SentenceParser.Leaf) elements[i], false, OTHER, sentence, tags, target);
       } else {
         processNode((SentenceParser.Node) elements[i], sentence, tags, target, null);
       }
     }
   }
 }
Ejemplo n.º 2
0
  private void processNode(
      SentenceParser.Node node,
      List<String> sentence,
      List<String> tags,
      List<String> target,
      String inheritedTag) {
    String phraseTag = getChunkTag(node);

    boolean inherited = false;
    if (phraseTag.equals(OTHER) && inheritedTag != null) {
      phraseTag = inheritedTag;
      inherited = true;
    }

    TreeElement[] elements = node.getElements();
    for (int i = 0; i < elements.length; i++) {
      if (elements[i].isLeaf()) {
        boolean isIntermediate = false;
        String tag = phraseTag;
        SentenceParser.Leaf leaf = (SentenceParser.Leaf) elements[i];

        String localChunk = getChunkTag(leaf);
        if (localChunk != null && !tag.equals(localChunk)) {
          tag = localChunk;
        }

        if (isIntermediate(tags, target, tag) && (inherited || i > 0)) {
          isIntermediate = true;
        }
        if (!isIncludePunctuations()
            && leaf.getFunctionalTag() == null
            && (!(i + 1 < elements.length && elements[i + 1].isLeaf())
                || !(i > 0 && elements[i - 1].isLeaf()))) {
          isIntermediate = false;
          tag = OTHER;
        }
        processLeaf(leaf, isIntermediate, tag, sentence, tags, target);
      } else {
        int before = target.size();
        processNode((SentenceParser.Node) elements[i], sentence, tags, target, phraseTag);

        // if the child node was of a different type we should break the chunk sequence
        for (int j = target.size() - 1; j >= before; j--) {
          if (!target.get(j).endsWith("-" + phraseTag)) {
            phraseTag = OTHER;
            break;
          }
        }
      }
    }
  }
Ejemplo n.º 3
0
  protected String getChunkTag(SentenceParser.Node node) {
    String tag = node.getSyntacticTag();

    String phraseTag = tag.substring(tag.lastIndexOf(":") + 1);

    while (phraseTag.endsWith("-")) {
      phraseTag = phraseTag.substring(0, phraseTag.length() - 1);
    }

    // maybe we should use only np, vp and pp, but will keep ap and advp.
    if (phraseTag.equals("np")
        || phraseTag.equals("vp")
        || phraseTag.equals("pp")
        || phraseTag.equals("ap")
        || phraseTag.equals("advp")
        || phraseTag.equals("adjp")) {
      phraseTag = phraseTag.toUpperCase();
    } else {
      phraseTag = OTHER;
    }
    return phraseTag;
  }