Esempio n. 1
0
  // Annotation interface
  public Annotation process(
      String text,
      double confidence,
      int support,
      String ontologyTypesString,
      String sparqlQuery,
      boolean blacklist,
      boolean coreferenceResolution,
      Spotter spotter,
      ParagraphDisambiguatorJ disambiguator)
      throws SearchException, ItemNotFoundException, InputException, SpottingException {

    Annotation annotation = new Annotation(text);
    List<Spot> spots = new LinkedList<Spot>();

    Text textObject = new Text(text);
    textObject.setFeature(new Score("confidence", confidence));

    if (Server.getTokenizer() != null) Server.getTokenizer().tokenizeMaybe(textObject);

    List<SurfaceFormOccurrence> entityMentions = spotter.extract(textObject);
    if (entityMentions.size() == 0) return annotation; // nothing to disambiguate
    Paragraph paragraph = Factory.paragraph().fromJ(entityMentions);
    LOG.info(String.format("Spotted %d entity mentions.", entityMentions.size()));

    Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entityCandidates =
        disambiguator.bestK(paragraph, k);
    LOG.info(
        String.format(
            "Disambiguated %d candidates with %s.", entityCandidates.size(), disambiguator.name()));

    Enumeration.Value listColor =
        blacklist ? FilterPolicy$.MODULE$.Blacklist() : FilterPolicy$.MODULE$.Whitelist();

    /*The previous addition of filter to the Candidates requests (which has usability questioned) produce the error described at issue #136.
      To solve it, this feature for this argument (Candidates) is disabled, setting coreferenceResolution to false ever. Ignoring the user's configuration.
    */
    Boolean unableCoreferenceResolution = false;
    FilterElement filter =
        new OccsFilter(
            confidence,
            support,
            ontologyTypesString,
            sparqlQuery,
            blacklist,
            unableCoreferenceResolution,
            Server.getSimilarityThresholds(),
            Server.getSparqlExecute());

    Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> filteredEntityCandidates =
        new HashMap<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>>();
    ;

    for (Map.Entry<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entry :
        entityCandidates.entrySet()) {
      List<DBpediaResourceOccurrence> result =
          filter.accept(new FilterOccsImpl(), entry.getValue());

      if (!result.isEmpty()) filteredEntityCandidates.put(entry.getKey(), result);
    }

    for (SurfaceFormOccurrence sfOcc : filteredEntityCandidates.keySet()) {
      Spot spot = Spot.getInstance(sfOcc);
      List<Resource> resources = new LinkedList<Resource>();
      for (DBpediaResourceOccurrence occ : filteredEntityCandidates.get(sfOcc)) {
        Resource resource = Resource.getInstance(occ);
        resources.add(resource);
      }
      spot.setResources(resources);
      spots.add(spot);
    }
    annotation.setSpots(spots);
    return annotation;
  }
  /** TODO Does not do any filtering at the moment!!! */
  public Annotation process(
      String text,
      double confidence,
      int support,
      List<OntologyType> ontologyTypes,
      String sparqlQuery,
      boolean blacklist,
      boolean coreferenceResolution,
      Spotter spotter,
      ParagraphDisambiguatorJ disambiguator)
      throws SearchException, ItemNotFoundException, InputException, SpottingException {

    Annotation annotation = new Annotation(text);
    List<Spot> spots = new LinkedList<Spot>();

    Text textObject = new Text(text);

    if (Server.getTokenizer() != null) Server.getTokenizer().tokenizeMaybe(textObject);

    List<SurfaceFormOccurrence> entityMentions = spotter.extract(textObject);
    if (entityMentions.size() == 0) return annotation; // nothing to di
    // sambiguate
    Paragraph paragraph = Factory.paragraph().fromJ(entityMentions);
    LOG.info(String.format("Spotted %d entity mentions.", entityMentions.size()));

    Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entityCandidates =
        disambiguator.bestK(paragraph, k);
    LOG.info(
        String.format(
            "Disambiguated %d candidates with %s.", entityCandidates.size(), disambiguator.name()));

    Enumeration.Value listColor =
        blacklist ? FilterPolicy$.MODULE$.Blacklist() : FilterPolicy$.MODULE$.Whitelist();

    Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> filteredEntityCandidates =
        entityCandidates;

    if (Server.getCombinedFilters() != null) {
      CombineAllAnnotationFilters filters = Server.getCombinedFilters();
      filteredEntityCandidates =
          filters.filter(
              entityCandidates,
              confidence,
              support,
              ontologyTypes,
              sparqlQuery,
              listColor,
              coreferenceResolution);
    }

    for (SurfaceFormOccurrence sfOcc : filteredEntityCandidates.keySet()) {
      Spot spot = Spot.getInstance(sfOcc);
      List<Resource> resources = new LinkedList<Resource>();
      for (DBpediaResourceOccurrence occ : filteredEntityCandidates.get(sfOcc)) {
        Resource resource = Resource.getInstance(occ);
        resources.add(resource);
      }
      spot.setResources(resources);
      spots.add(spot);
    }
    annotation.setSpots(spots);
    return annotation;
  }