public Annotation getAnnotation( String text, double confidence, int support, String ontologyTypesString, String sparqlQuery, String policy, boolean coreferenceResolution, String spotterName, String disambiguatorName, String clientIp) throws SearchException, InputException, ItemNotFoundException, SpottingException, MalformedURLException, BoilerpipeProcessingException { LOG.info("******************************** Parameters ********************************"); // announceAPI(); boolean blacklist = false; if (policy.trim().equalsIgnoreCase("blacklist")) { blacklist = true; policy = "blacklist"; } else { policy = "whitelist"; } LOG.info("client ip: " + clientIp); LOG.info("text to be processed: " + text); LOG.info("text length in chars: " + text.length()); LOG.info("confidence: " + String.valueOf(confidence)); LOG.info("support: " + String.valueOf(support)); LOG.info("types: " + ontologyTypesString); LOG.info("sparqlQuery: " + sparqlQuery); LOG.info("policy: " + policy); LOG.info("coreferenceResolution: " + String.valueOf(coreferenceResolution)); LOG.info("spotter: " + spotterName); LOG.info("disambiguator: " + disambiguatorName); /* Validating parameters */ if (text.trim().equals("")) { throw new InputException("No text was specified in the &text parameter."); } /* Setting defaults */ if (Server.getTokenizer() == null && disambiguatorName == SpotlightConfiguration.DisambiguationPolicy.Default.name() && text.length() > 1200) { disambiguatorName = SpotlightConfiguration.DisambiguationPolicy.Document.name(); LOG.info( String.format( "Text length: %d. Using %s to disambiguate.", text.length(), disambiguatorName)); } Spotter spotter = Server.getSpotter(spotterName); ParagraphDisambiguatorJ disambiguator = Server.getDisambiguator(disambiguatorName); /* Running Annotation */ Annotation annotation = process( text, confidence, support, ontologyTypesString, sparqlQuery, blacklist, coreferenceResolution, spotter, disambiguator); LOG.debug("Shown: " + annotation.toXML()); LOG.debug("****************************************************************"); return annotation; }
// Annotation interface public Annotation process( String text, double confidence, int support, String ontologyTypesString, String sparqlQuery, boolean blacklist, boolean coreferenceResolution, Spotter spotter, ParagraphDisambiguatorJ disambiguator) throws SearchException, ItemNotFoundException, InputException, SpottingException { Annotation annotation = new Annotation(text); List<Spot> spots = new LinkedList<Spot>(); Text textObject = new Text(text); textObject.setFeature(new Score("confidence", confidence)); if (Server.getTokenizer() != null) Server.getTokenizer().tokenizeMaybe(textObject); List<SurfaceFormOccurrence> entityMentions = spotter.extract(textObject); if (entityMentions.size() == 0) return annotation; // nothing to disambiguate Paragraph paragraph = Factory.paragraph().fromJ(entityMentions); LOG.info(String.format("Spotted %d entity mentions.", entityMentions.size())); Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entityCandidates = disambiguator.bestK(paragraph, k); LOG.info( String.format( "Disambiguated %d candidates with %s.", entityCandidates.size(), disambiguator.name())); Enumeration.Value listColor = blacklist ? FilterPolicy$.MODULE$.Blacklist() : FilterPolicy$.MODULE$.Whitelist(); /*The previous addition of filter to the Candidates requests (which has usability questioned) produce the error described at issue #136. To solve it, this feature for this argument (Candidates) is disabled, setting coreferenceResolution to false ever. Ignoring the user's configuration. */ Boolean unableCoreferenceResolution = false; FilterElement filter = new OccsFilter( confidence, support, ontologyTypesString, sparqlQuery, blacklist, unableCoreferenceResolution, Server.getSimilarityThresholds(), Server.getSparqlExecute()); Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> filteredEntityCandidates = new HashMap<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>>(); ; for (Map.Entry<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entry : entityCandidates.entrySet()) { List<DBpediaResourceOccurrence> result = filter.accept(new FilterOccsImpl(), entry.getValue()); if (!result.isEmpty()) filteredEntityCandidates.put(entry.getKey(), result); } for (SurfaceFormOccurrence sfOcc : filteredEntityCandidates.keySet()) { Spot spot = Spot.getInstance(sfOcc); List<Resource> resources = new LinkedList<Resource>(); for (DBpediaResourceOccurrence occ : filteredEntityCandidates.get(sfOcc)) { Resource resource = Resource.getInstance(occ); resources.add(resource); } spot.setResources(resources); spots.add(spot); } annotation.setSpots(spots); return annotation; }
/** TODO Does not do any filtering at the moment!!! */ public Annotation process( String text, double confidence, int support, List<OntologyType> ontologyTypes, String sparqlQuery, boolean blacklist, boolean coreferenceResolution, Spotter spotter, ParagraphDisambiguatorJ disambiguator) throws SearchException, ItemNotFoundException, InputException, SpottingException { Annotation annotation = new Annotation(text); List<Spot> spots = new LinkedList<Spot>(); Text textObject = new Text(text); if (Server.getTokenizer() != null) Server.getTokenizer().tokenizeMaybe(textObject); List<SurfaceFormOccurrence> entityMentions = spotter.extract(textObject); if (entityMentions.size() == 0) return annotation; // nothing to di // sambiguate Paragraph paragraph = Factory.paragraph().fromJ(entityMentions); LOG.info(String.format("Spotted %d entity mentions.", entityMentions.size())); Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> entityCandidates = disambiguator.bestK(paragraph, k); LOG.info( String.format( "Disambiguated %d candidates with %s.", entityCandidates.size(), disambiguator.name())); Enumeration.Value listColor = blacklist ? FilterPolicy$.MODULE$.Blacklist() : FilterPolicy$.MODULE$.Whitelist(); Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> filteredEntityCandidates = entityCandidates; if (Server.getCombinedFilters() != null) { CombineAllAnnotationFilters filters = Server.getCombinedFilters(); filteredEntityCandidates = filters.filter( entityCandidates, confidence, support, ontologyTypes, sparqlQuery, listColor, coreferenceResolution); } for (SurfaceFormOccurrence sfOcc : filteredEntityCandidates.keySet()) { Spot spot = Spot.getInstance(sfOcc); List<Resource> resources = new LinkedList<Resource>(); for (DBpediaResourceOccurrence occ : filteredEntityCandidates.get(sfOcc)) { Resource resource = Resource.getInstance(occ); resources.add(resource); } spot.setResources(resources); spots.add(spot); } annotation.setSpots(spots); return annotation; }