// Annotation interface public Annotation process( String text, double confidence, int support, String ontologyTypesString, String sparqlQuery, boolean blacklist, boolean coreferenceResolution, Spotter spotter, ParagraphDisambiguatorJ disambiguator) throws SearchException, ItemNotFoundException, InputException, SpottingException { Annotation annotation = new Annotation(text); List<Spot> spots = new LinkedList<Spot>(); Text textObject = new Text(text); textObject.setFeature(new Score("confidence", confidence)); if (Server.getTokenizer() != null) Server.getTokenizer().tokenizeMaybe(textObject); List<SurfaceFormOccurrence> entityMentions = spotter.extract(textObject); if (entityMentions.size() == 0) return annotation; // nothing to disambiguate Paragraph paragraph = Factory.paragraph().fromJ(entityMentions); LOG.info(String.format("Spotted %d entity mentions.", entityMentions.size())); Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entityCandidates = disambiguator.bestK(paragraph, k); LOG.info( String.format( "Disambiguated %d candidates with %s.", entityCandidates.size(), disambiguator.name())); Enumeration.Value listColor = blacklist ? FilterPolicy$.MODULE$.Blacklist() : FilterPolicy$.MODULE$.Whitelist(); /*The previous addition of filter to the Candidates requests (which has usability questioned) produce the error described at issue #136. To solve it, this feature for this argument (Candidates) is disabled, setting coreferenceResolution to false ever. Ignoring the user's configuration. */ Boolean unableCoreferenceResolution = false; FilterElement filter = new OccsFilter( confidence, support, ontologyTypesString, sparqlQuery, blacklist, unableCoreferenceResolution, Server.getSimilarityThresholds(), Server.getSparqlExecute()); Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> filteredEntityCandidates = new HashMap<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>>(); ; for (Map.Entry<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entry : entityCandidates.entrySet()) { List<DBpediaResourceOccurrence> result = filter.accept(new FilterOccsImpl(), entry.getValue()); if (!result.isEmpty()) filteredEntityCandidates.put(entry.getKey(), result); } for (SurfaceFormOccurrence sfOcc : filteredEntityCandidates.keySet()) { Spot spot = Spot.getInstance(sfOcc); List<Resource> resources = new LinkedList<Resource>(); for (DBpediaResourceOccurrence occ : filteredEntityCandidates.get(sfOcc)) { Resource resource = Resource.getInstance(occ); resources.add(resource); } spot.setResources(resources); spots.add(spot); } annotation.setSpots(spots); return annotation; }
/** TODO Does not do any filtering at the moment!!! */ public Annotation process( String text, double confidence, int support, List<OntologyType> ontologyTypes, String sparqlQuery, boolean blacklist, boolean coreferenceResolution, Spotter spotter, ParagraphDisambiguatorJ disambiguator) throws SearchException, ItemNotFoundException, InputException, SpottingException { Annotation annotation = new Annotation(text); List<Spot> spots = new LinkedList<Spot>(); Text textObject = new Text(text); if (Server.getTokenizer() != null) Server.getTokenizer().tokenizeMaybe(textObject); List<SurfaceFormOccurrence> entityMentions = spotter.extract(textObject); if (entityMentions.size() == 0) return annotation; // nothing to di // sambiguate Paragraph paragraph = Factory.paragraph().fromJ(entityMentions); LOG.info(String.format("Spotted %d entity mentions.", entityMentions.size())); Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entityCandidates = disambiguator.bestK(paragraph, k); LOG.info( String.format( "Disambiguated %d candidates with %s.", entityCandidates.size(), disambiguator.name())); Enumeration.Value listColor = blacklist ? FilterPolicy$.MODULE$.Blacklist() : FilterPolicy$.MODULE$.Whitelist(); Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> filteredEntityCandidates = entityCandidates; if (Server.getCombinedFilters() != null) { CombineAllAnnotationFilters filters = Server.getCombinedFilters(); filteredEntityCandidates = filters.filter( entityCandidates, confidence, support, ontologyTypes, sparqlQuery, listColor, coreferenceResolution); } for (SurfaceFormOccurrence sfOcc : filteredEntityCandidates.keySet()) { Spot spot = Spot.getInstance(sfOcc); List<Resource> resources = new LinkedList<Resource>(); for (DBpediaResourceOccurrence occ : filteredEntityCandidates.get(sfOcc)) { Resource resource = Resource.getInstance(occ); resources.add(resource); } spot.setResources(resources); spots.add(spot); } annotation.setSpots(spots); return annotation; }