예제 #1
0
  /** TODO Does not do any filtering at the moment!!! */
  public Annotation process(
      String text,
      double confidence,
      int support,
      List<OntologyType> ontologyTypes,
      String sparqlQuery,
      boolean blacklist,
      boolean coreferenceResolution,
      Spotter spotter,
      ParagraphDisambiguatorJ disambiguator)
      throws SearchException, ItemNotFoundException, InputException, SpottingException {

    Annotation annotation = new Annotation(text);
    List<Spot> spots = new LinkedList<Spot>();

    Text textObject = new Text(text);

    if (Server.getTokenizer() != null) Server.getTokenizer().tokenizeMaybe(textObject);

    List<SurfaceFormOccurrence> entityMentions = spotter.extract(textObject);
    if (entityMentions.size() == 0) return annotation; // nothing to di
    // sambiguate
    Paragraph paragraph = Factory.paragraph().fromJ(entityMentions);
    LOG.info(String.format("Spotted %d entity mentions.", entityMentions.size()));

    Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entityCandidates =
        disambiguator.bestK(paragraph, k);
    LOG.info(
        String.format(
            "Disambiguated %d candidates with %s.", entityCandidates.size(), disambiguator.name()));

    Enumeration.Value listColor =
        blacklist ? FilterPolicy$.MODULE$.Blacklist() : FilterPolicy$.MODULE$.Whitelist();

    Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> filteredEntityCandidates =
        entityCandidates;

    if (Server.getCombinedFilters() != null) {
      CombineAllAnnotationFilters filters = Server.getCombinedFilters();
      filteredEntityCandidates =
          filters.filter(
              entityCandidates,
              confidence,
              support,
              ontologyTypes,
              sparqlQuery,
              listColor,
              coreferenceResolution);
    }

    for (SurfaceFormOccurrence sfOcc : filteredEntityCandidates.keySet()) {
      Spot spot = Spot.getInstance(sfOcc);
      List<Resource> resources = new LinkedList<Resource>();
      for (DBpediaResourceOccurrence occ : filteredEntityCandidates.get(sfOcc)) {
        Resource resource = Resource.getInstance(occ);
        resources.add(resource);
      }
      spot.setResources(resources);
      spots.add(spot);
    }
    annotation.setSpots(spots);
    return annotation;
  }
예제 #2
0
  // Annotation interface
  public Annotation process(
      String text,
      double confidence,
      int support,
      String ontologyTypesString,
      String sparqlQuery,
      boolean blacklist,
      boolean coreferenceResolution,
      Spotter spotter,
      ParagraphDisambiguatorJ disambiguator)
      throws SearchException, ItemNotFoundException, InputException, SpottingException {

    Annotation annotation = new Annotation(text);
    List<Spot> spots = new LinkedList<Spot>();

    Text textObject = new Text(text);
    textObject.setFeature(new Score("confidence", confidence));

    if (Server.getTokenizer() != null) Server.getTokenizer().tokenizeMaybe(textObject);

    List<SurfaceFormOccurrence> entityMentions = spotter.extract(textObject);
    if (entityMentions.size() == 0) return annotation; // nothing to disambiguate
    Paragraph paragraph = Factory.paragraph().fromJ(entityMentions);
    LOG.info(String.format("Spotted %d entity mentions.", entityMentions.size()));

    Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entityCandidates =
        disambiguator.bestK(paragraph, k);
    LOG.info(
        String.format(
            "Disambiguated %d candidates with %s.", entityCandidates.size(), disambiguator.name()));

    Enumeration.Value listColor =
        blacklist ? FilterPolicy$.MODULE$.Blacklist() : FilterPolicy$.MODULE$.Whitelist();

    /*The previous addition of filter to the Candidates requests (which has usability questioned) produce the error described at issue #136.
      To solve it, this feature for this argument (Candidates) is disabled, setting coreferenceResolution to false ever. Ignoring the user's configuration.
    */
    Boolean unableCoreferenceResolution = false;
    FilterElement filter =
        new OccsFilter(
            confidence,
            support,
            ontologyTypesString,
            sparqlQuery,
            blacklist,
            unableCoreferenceResolution,
            Server.getSimilarityThresholds(),
            Server.getSparqlExecute());

    Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> filteredEntityCandidates =
        new HashMap<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>>();
    ;

    for (Map.Entry<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entry :
        entityCandidates.entrySet()) {
      List<DBpediaResourceOccurrence> result =
          filter.accept(new FilterOccsImpl(), entry.getValue());

      if (!result.isEmpty()) filteredEntityCandidates.put(entry.getKey(), result);
    }

    for (SurfaceFormOccurrence sfOcc : filteredEntityCandidates.keySet()) {
      Spot spot = Spot.getInstance(sfOcc);
      List<Resource> resources = new LinkedList<Resource>();
      for (DBpediaResourceOccurrence occ : filteredEntityCandidates.get(sfOcc)) {
        Resource resource = Resource.getInstance(occ);
        resources.add(resource);
      }
      spot.setResources(resources);
      spots.add(spot);
    }
    annotation.setSpots(spots);
    return annotation;
  }
예제 #3
0
  public Annotation getAnnotation(
      String text,
      double confidence,
      int support,
      String ontologyTypesString,
      String sparqlQuery,
      String policy,
      boolean coreferenceResolution,
      String spotterName,
      String disambiguatorName,
      String clientIp)
      throws SearchException, InputException, ItemNotFoundException, SpottingException,
          MalformedURLException, BoilerpipeProcessingException {

    LOG.info("******************************** Parameters ********************************");
    // announceAPI();

    boolean blacklist = false;
    if (policy.trim().equalsIgnoreCase("blacklist")) {
      blacklist = true;
      policy = "blacklist";
    } else {
      policy = "whitelist";
    }
    LOG.info("client ip: " + clientIp);
    LOG.info("text to be processed: " + text);
    LOG.info("text length in chars: " + text.length());
    LOG.info("confidence: " + String.valueOf(confidence));
    LOG.info("support: " + String.valueOf(support));
    LOG.info("types: " + ontologyTypesString);
    LOG.info("sparqlQuery: " + sparqlQuery);
    LOG.info("policy: " + policy);
    LOG.info("coreferenceResolution: " + String.valueOf(coreferenceResolution));
    LOG.info("spotter: " + spotterName);
    LOG.info("disambiguator: " + disambiguatorName);

    /* Validating parameters */

    if (text.trim().equals("")) {
      throw new InputException("No text was specified in the &text parameter.");
    }

    List<OntologyType> ontologyTypes = new ArrayList<OntologyType>();
    String types[] = ontologyTypesString.trim().split(",");
    for (String t : types) {
      if (!t.trim().equals("")) ontologyTypes.add(Factory.ontologyType().fromQName(t.trim()));
      // LOG.info("type:"+t.trim());
    }

    /* Setting defaults */
    if (Server.getTokenizer() == null
        && disambiguatorName == SpotlightConfiguration.DisambiguationPolicy.Default.name()
        && text.length() > 1200) {
      disambiguatorName = SpotlightConfiguration.DisambiguationPolicy.Document.name();
      LOG.info(
          String.format(
              "Text length: %d. Using %s to disambiguate.", text.length(), disambiguatorName));
    }

    Spotter spotter = Server.getSpotter(spotterName);
    ParagraphDisambiguatorJ disambiguator = Server.getDisambiguator(disambiguatorName);

    /* Running Annotation */

    Annotation annotation =
        process(
            text,
            confidence,
            support,
            ontologyTypes,
            sparqlQuery,
            blacklist,
            coreferenceResolution,
            spotter,
            disambiguator);

    LOG.debug("Shown: " + annotation.toXML());
    LOG.debug("****************************************************************");

    return annotation;
  }