Ejemplo n.º 1
0
  /**
   * Adds named entity information to parses.
   *
   * @param tag named entity type
   * @param names spans of tokens that are named entities
   * @param tokens parses for the tokens
   */
  private static void addNames(String tag, List names, Parse[] tokens) {
    for (int i = 0; i < names.size(); i++) {
      Span nameTokenSpan = (Span) names.get(i);
      Parse startToken = tokens[nameTokenSpan.getStart()];
      Parse endToken = tokens[nameTokenSpan.getEnd()];
      Parse commonP = startToken.getCommonParent(endToken);

      if (commonP != null) {
        Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd());

        if (nameSpan.equals(commonP.getSpan())) {
          // common parent matches exactly the named entity
          commonP.insert(new Parse(commonP.getText(), nameSpan, tag, 1.0));
        } else {
          // common parent includes the named entity
          Parse[] kids = commonP.getChildren();
          boolean crossingKids = false;

          for (int j = 0; j < kids.length; j++)
            if (nameSpan.crosses(kids[j].getSpan())) crossingKids = true;

          if (!crossingKids) {
            // named entity does not cross children
            commonP.insert(new Parse(commonP.getText(), nameSpan, tag, 1.0));
          } else {
            // NE crosses children
            if (commonP.getType().equals("NP")) {
              Parse[] grandKids = kids[0].getChildren();

              Parse last = grandKids[grandKids.length - 1];
              if (grandKids.length > 1 && nameSpan.contains(last.getSpan()))
                commonP.insert(new Parse(commonP.getText(), commonP.getSpan(), tag, 1.0));
            }
          }
        }
      }
    }
  }