コード例 #1
0
  // carry out the actual annotations on the given span of text in the
  // document.
  protected void annotateText(Document doc, AnnotationSet outputAS, long from, long to) {
    String text = "";
    try {
      text = doc.getContent().getContent(from, to).toString();
    } catch (InvalidOffsetException ex) {
      throw new GateRuntimeException("Unexpected offset exception, offsets are " + from + "/" + to);
    }
    // send the text to the service and get back the response
    // System.out.println("Annotating text: "+text);
    // System.out.println("Starting offset is "+from);

    // NOTE: there is a bug in the TagMe service which causes offset errors
    // if we use the tweet mode and there are certain patterns in the tweet.
    // The approach recommended by Francesco Piccinno is to replace those
    // patterns by spaces.
    if (getIsTweet()) {
      logger.debug("Text before cleaning: >>" + text + "<<");
      // replace
      text = text.replaceAll(patternStringRT3, "    ");
      text = text.replaceAll(patternStringRT2, "   ");
      text = text.replaceAll(patternHashTag, " $1");
      // now replace the remaining patterns by spaces
      StringBuilder sb = new StringBuilder(text);
      Matcher m = patternUrl.matcher(text);
      while (m.find()) {
        int start = m.start();
        int end = m.end();
        sb.replace(start, end, nSpaces(end - start));
      }
      m = patternUser.matcher(text);
      while (m.find()) {
        int start = m.start();
        int end = m.end();
        sb.replace(start, end, nSpaces(end - start));
      }
      text = sb.toString();
      logger.debug("Text after cleaning:  >>" + text + "<<");
    }
    TagMeAnnotation[] tagmeAnnotations = getTagMeAnnotations(text);
    for (TagMeAnnotation tagmeAnn : tagmeAnnotations) {
      if (tagmeAnn.rho >= minrho) {
        FeatureMap fm = Factory.newFeatureMap();
        fm.put("tagMeId", tagmeAnn.id);
        fm.put("title", tagmeAnn.title);
        fm.put("rho", tagmeAnn.rho);
        fm.put("spot", tagmeAnn.spot);
        fm.put("link_probability", tagmeAnn.link_probability);
        if (tagmeAnn.title == null) {
          throw new GateRuntimeException("Odd: got a null title from the TagMe service" + tagmeAnn);
        } else {
          fm.put("inst", "http://dbpedia.org/resource/" + recodeForDbp38(tagmeAnn.title));
        }
        try {
          gate.Utils.addAnn(
              outputAS, from + tagmeAnn.start, from + tagmeAnn.end, getOutputAnnotationType(), fm);
        } catch (Exception ex) {
          System.err.println(
              "Got an exception in document " + doc.getName() + ": " + ex.getLocalizedMessage());
          ex.printStackTrace(System.err);
          System.err.println("from=" + from + ", to=" + to + " TagMeAnn=" + tagmeAnn);
        }
      }
    }
  }