Ejemplo n.º 1
0
 // TODO: roll check into tokens regex pattern?
 // That allows for better matching because unmatched sequences will be eliminated at match time
 private boolean checkPosTags(List<CoreLabel> tokens, int start, int end) {
   if (validPosPattern != null) {
     // Need to check POS tag too...
     switch (posMatchType) {
       case MATCH_ONE_TOKEN_PHRASE_ONLY:
         if (tokens.size() > 1) return true;
         // fall through
       case MATCH_AT_LEAST_ONE_TOKEN:
         for (int i = start; i < end; i++) {
           CoreLabel token = tokens.get(i);
           String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
           if (pos != null && validPosPattern.matcher(pos).matches()) {
             return true;
           }
         }
         return false;
       case MATCH_ALL_TOKENS:
         // Checked else where
         return true;
       default:
         // Don't know this match type....
         return true;
     }
   }
   return true;
 }
Ejemplo n.º 2
0
  private void annotateMatched(List<CoreLabel> tokens) {
    List<SequenceMatchResult<CoreMap>> matched = multiPatternMatcher.findNonOverlapping(tokens);
    for (SequenceMatchResult<CoreMap> m : matched) {
      Entry entry = patternToEntry.get(m.pattern());

      // Check if we will overwrite the existing annotation with this annotation
      int g = entry.annotateGroup;
      int start = m.start(g);
      int end = m.end(g);

      boolean overwriteOriginalNer = checkPosTags(tokens, start, end);
      if (overwriteOriginalNer) {
        overwriteOriginalNer = checkOrigNerTags(entry, tokens, start, end);
      }
      if (overwriteOriginalNer) {
        for (int i = start; i < end; i++) {
          tokens.get(i).set(CoreAnnotations.NamedEntityTagAnnotation.class, entry.type);
        }
      } else {
        if (verbose) {
          System.err.println(
              "Not annotating  '"
                  + m.group(g)
                  + "': "
                  + StringUtils.joinFields(
                      m.groupNodes(g), CoreAnnotations.NamedEntityTagAnnotation.class)
                  + " with "
                  + entry.type
                  + ", sentence is '"
                  + StringUtils.joinWords(tokens, " ")
                  + '\'');
        }
      }
    }
  }
 @SuppressWarnings("unused")
 private static String getText(List<? extends CoreMap> list, int index) {
   return list.get(index).get(CoreAnnotations.TextAnnotation.class);
 }
 private static TimeExpression getTimeExpression(List<? extends CoreMap> list, int index) {
   return list.get(index).get(TimeExpression.Annotation.class);
 }
Ejemplo n.º 5
0
  private boolean checkOrigNerTags(Entry entry, List<CoreLabel> tokens, int start, int end) {
    int prevNerEndIndex = start - 1;
    int nextNerStartIndex = end;

    // Check if we found a pattern that overlaps with existing ner labels
    // tag1 tag1 x   x  tag2 tag2
    //      tag tag tag tag
    // Don't overwrite the old ner label if we overlap like this
    String startNer = tokens.get(start).ner();
    String endNer = tokens.get(end - 1).ner();
    if (startNer != null && !myLabels.contains(startNer)) {
      while (prevNerEndIndex >= 0) {
        // go backwards to find different entity type
        String ner = tokens.get(prevNerEndIndex).ner();
        if (ner == null || !ner.equals(startNer)) {
          break;
        }
        prevNerEndIndex--;
      }
    }
    if (endNer != null && !myLabels.contains(endNer)) {
      while (nextNerStartIndex < tokens.size()) {
        // go backwards to find different entity type
        String ner = tokens.get(nextNerStartIndex).ner();
        if (ner == null || !ner.equals(endNer)) {
          break;
        }
        nextNerStartIndex++;
      }
    }
    boolean overwriteOriginalNer = false;
    //noinspection StatementWithEmptyBody
    if (prevNerEndIndex != (start - 1) || nextNerStartIndex != end) {
      // Cutting across already recognized NEs don't disturb
    } else if (startNer == null) {
      // No old ner, okay to replace
      overwriteOriginalNer = true;
    } else {
      // Check if we have one consistent NER tag
      // if not, overwrite
      // if consistent, overwrite only if in our set of ner tags that we overwrite
      for (int i = start + 1; i < end; i++) {
        if (!startNer.equals(tokens.get(i).ner())) {
          overwriteOriginalNer = true;
          break;
        }
      }
      if (!overwriteOriginalNer) {
        // check if old ner type was one that was specified as explicitly overwritable by this entry
        if (entry.overwritableTypes.contains(startNer)) {
          overwriteOriginalNer = true;
        } else {
          // if this ner type doesn't belong to the labels for which we don't overwrite the default
          // labels (noDefaultOverwriteLabels)
          // we check mylabels to see if we can overwrite this entry
          if (
          /*entry.overwritableTypes.isEmpty() || */ !noDefaultOverwriteLabels.contains(
              entry.type)) {
            overwriteOriginalNer = myLabels.contains(startNer);
          }
        }
      }
    }
    return overwriteOriginalNer;
  }