/** * this method finds potential options based on the detected main subject; it is usually used when * none of the ontology-related annotations exist * * @param key * @return */ public List<Vote> generateGenericVotes( SuggestionKey key, POC poc, boolean addNone, Integer max, List<SemanticConcept> toSkip) { Set<String> avoidThese = new HashSet<String>(); if (toSkip != null) for (SemanticConcept concept : toSkip) { String uri = concept.getOntologyElement().getData().toString(); avoidThese.add(uri); } String text = key.getText(); List<Vote> votes = new ArrayList<Vote>(); List<OntologyElement> elements; try { elements = findGenericOntologyElements(max); for (OntologyElement element : elements) { if (avoidThese.contains(element.getData().toString())) continue; element.setAnnotation(poc.getAnnotation()); // run some similarity metric here and use some threshold... // if similarityScore>threshold then add this to the // clarificationOptions String pPropertyShortName = ""; String suggestion = ((SerializableURI) element.getData()).toString(); try { SerializableURI elementUri = new SerializableURI(suggestion, false); pPropertyShortName = elementUri.getResourceName(); } catch (Exception e) { pPropertyShortName = suggestion; } String niceLabel = StringUtil.beautifyString(pPropertyShortName); Vote vote = new Vote(); long id = incrementer.incrementAndGet(); vote.setId(id); SemanticConcept candidateSemanticConcept = new SemanticConcept(); candidateSemanticConcept.setOntologyElement(element); SemanticConcept clonedConcept = (SemanticConcept) candidateSemanticConcept.clone(); vote.setCandidate(clonedConcept); double totalSimilarity = similarityCalculator.findSimilarity(text, niceLabel); vote.setVote(totalSimilarity); votes.add(vote); votes.addAll(addAdditionalVotes(element, poc, text, pPropertyShortName, false)); } // here we add datatypeProperties with no domain etc. votes.addAll( generateVotesFromOntologyElements(findHangingElements(), avoidThese, poc, null, text)); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } // add None element if (addNone) { Vote vote = VoteGenerator.generateNoneVote(incrementer.incrementAndGet()); vote.getCandidate().getOntologyElement().setAnnotation(poc.getAnnotation()); votes.add(vote); } return votes; }
/** * Finds clarification options for given interpretation by applying rules such as: match head with * some existing (datatype)property of the OntoRes which is first in the query (if there is such * OntoRes) what is city population of california? head: population modf: city unified two methods * on 27.02.2010. what is the largest city in california? (city is governor, largest is dependent) * s: is 'largest' related to: <list od datatype properties which are related to cities (in * california)> * * @param key * @return */ public List<Vote> generateVotes( SuggestionKey key, POC poc, boolean addNone, List<SemanticConcept> toSkip) { logger.debug("Before generating suggestions, this many needs to be skipped: " + toSkip.size()); long start = System.currentTimeMillis(); Set<String> avoidThese = new HashSet<String>(); if (toSkip != null) for (SemanticConcept concept : toSkip) { String uri = concept.getOntologyElement().getData().toString(); avoidThese.add(uri); } long end = System.currentTimeMillis(); List<Vote> votes = new ArrayList<Vote>(); logger.info("Moved OCs to the list to avoid for " + (end - start) + "ms."); List<SemanticConcept> neighbours = key.getNearestNeighbours(); String text = key.getText(); Integer num = null; if (neighbours != null) num = neighbours.size(); logger.info("Found " + num + " neighbours for " + key.getText()); /* * this list ensures that the suggestions with an URI is added only once as if there are more than one * NNeighbours it can happen that the same suggestion is generated for each neighbour */ Set<OntologyElement> allCandidateElements = new HashSet<OntologyElement>(); for (SemanticConcept neighbour : neighbours) { start = System.currentTimeMillis(); Set<OntologyElement> candidateElements = new HashSet<OntologyElement>(); List<OntologyElement> tmpCandidates; try { tmpCandidates = findCandidates(neighbour, text); end = System.currentTimeMillis(); logger.info("Finding candidates for:" + (end - start) + "ms"); start = System.currentTimeMillis(); for (OntologyElement el : tmpCandidates) { boolean elementAlreadyInSuggestions = elementAlreadyInSuggestions(new ArrayList(allCandidateElements), el); if (!elementAlreadyInSuggestions) { candidateElements.add(el); allCandidateElements.add(el); } { logger.debug("Skipping, element already added:" + el.getData().toString()); } } } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } end = System.currentTimeMillis(); logger.info("Checking whether candidates already added for:" + (end - start) + "ms"); logger.info( "Found " + candidateElements.size() + " suggestions (function elements not counted) for NN:" + neighbour.getOntologyElement().getData().toString() + " and text:" + text + ", total up to now:" + allCandidateElements.size() + ", total to remove: " + avoidThese.size()); start = System.currentTimeMillis(); try { votes.addAll( generateVotesFromOntologyElements( new ArrayList(candidateElements), avoidThese, poc, neighbour, text)); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } end = System.currentTimeMillis(); logger.info( "Generating Ontology Elements for:" + (end - start) + "ms, " + votes.size() + " suggestions to be shown."); // //////////////////////////////////////////////////// // ////////add the closest element if the poc to be clarified is // adjective // ////////////////////////////////////////////////// OntologyElement closestOntologyConcept = neighbour.getOntologyElement(); // if (!candidateElements.contains(closestOntologyConcept)) { boolean elementAlreadyInSuggestions = elementAlreadyInSuggestions(new ArrayList(candidateElements), closestOntologyConcept); if (!elementAlreadyInSuggestions) { SerializableURI uriUri = null; String cocSuggestion = closestOntologyConcept.getData().toString(); String cocUriOrLiteral = null; try { uriUri = new SerializableURI(cocSuggestion, false); cocUriOrLiteral = uriUri.getResourceName(); } catch (Exception e) { cocUriOrLiteral = cocSuggestion; } votes.addAll(addAdditionalVotes(closestOntologyConcept, poc, text, cocUriOrLiteral, true)); } } start = System.currentTimeMillis(); // here we add datatypeProperties with no domain etc. List<OntologyElement> hangingElements = new ArrayList<OntologyElement>(); try { hangingElements.addAll(findHangingElements()); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } List filteredHangingElements = new ArrayList(); for (OntologyElement el : hangingElements) { boolean elementAlreadyInSuggestions = elementAlreadyInSuggestions(new ArrayList(allCandidateElements), el); if (!elementAlreadyInSuggestions) filteredHangingElements.add(el); } try { votes.addAll( generateVotesFromOntologyElements(filteredHangingElements, avoidThese, poc, null, text)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } end = System.currentTimeMillis(); logger.info("Added hanging Elements for:" + (end - start) + "ms"); // add None element if (addNone) { Vote vote = VoteGenerator.generateNoneVote(incrementer.incrementAndGet()); vote.getCandidate().getOntologyElement().setAnnotation(poc.getAnnotation()); votes.add(vote); } // now call wordnet to recalculate scores return votes; }