/** * this method finds potential options based on the detected main subject; it is usually used when * none of the ontology-related annotations exist * * @param key * @return */ public List<Vote> generateGenericVotes( SuggestionKey key, POC poc, boolean addNone, Integer max, List<SemanticConcept> toSkip) { Set<String> avoidThese = new HashSet<String>(); if (toSkip != null) for (SemanticConcept concept : toSkip) { String uri = concept.getOntologyElement().getData().toString(); avoidThese.add(uri); } String text = key.getText(); List<Vote> votes = new ArrayList<Vote>(); List<OntologyElement> elements; try { elements = findGenericOntologyElements(max); for (OntologyElement element : elements) { if (avoidThese.contains(element.getData().toString())) continue; element.setAnnotation(poc.getAnnotation()); // run some similarity metric here and use some threshold... // if similarityScore>threshold then add this to the // clarificationOptions String pPropertyShortName = ""; String suggestion = ((SerializableURI) element.getData()).toString(); try { SerializableURI elementUri = new SerializableURI(suggestion, false); pPropertyShortName = elementUri.getResourceName(); } catch (Exception e) { pPropertyShortName = suggestion; } String niceLabel = StringUtil.beautifyString(pPropertyShortName); Vote vote = new Vote(); long id = incrementer.incrementAndGet(); vote.setId(id); SemanticConcept candidateSemanticConcept = new SemanticConcept(); candidateSemanticConcept.setOntologyElement(element); SemanticConcept clonedConcept = (SemanticConcept) candidateSemanticConcept.clone(); vote.setCandidate(clonedConcept); double totalSimilarity = similarityCalculator.findSimilarity(text, niceLabel); vote.setVote(totalSimilarity); votes.add(vote); votes.addAll(addAdditionalVotes(element, poc, text, pPropertyShortName, false)); } // here we add datatypeProperties with no domain etc. votes.addAll( generateVotesFromOntologyElements(findHangingElements(), avoidThese, poc, null, text)); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } // add None element if (addNone) { Vote vote = VoteGenerator.generateNoneVote(incrementer.incrementAndGet()); vote.getCandidate().getOntologyElement().setAnnotation(poc.getAnnotation()); votes.add(vote); } return votes; }
/** * generate class elements from the set of class uris * * @param classes * @return */ List<OntologyElement> returnClassElements(Set<String> classes) throws Exception { List<OntologyElement> elements = new ArrayList<OntologyElement>(); for (String euri : classes) { SerializableURI elementUri = null; try { elementUri = new SerializableURI(euri, false); } catch (Exception e) { e.printStackTrace(); } OntologyElement e = new ClassElement(); e.setData(elementUri); elements.add(e); } return elements; }
/** * checking whether the element is already added * * @param candidateElements * @param closestOntologyConcept * @return */ boolean elementAlreadyInSuggestions( List<OntologyElement> candidateElements, OntologyElement closestOntologyConcept) { boolean elementAlreadyInSuggestions = false; for (OntologyElement el : candidateElements) { if (el.getData() != null && el.getData().toString().equals(closestOntologyConcept.getData().toString())) elementAlreadyInSuggestions = true; else if (el instanceof DatatypePropertyValueElement && closestOntologyConcept instanceof DatatypePropertyValueElement) { DatatypePropertyValueIdentifier identif1 = (DatatypePropertyValueIdentifier) ((DatatypePropertyValueElement) el).getData(); DatatypePropertyValueIdentifier identif2 = (DatatypePropertyValueIdentifier) ((DatatypePropertyValueElement) closestOntologyConcept).getData(); if (identif1.equals(identif2)) elementAlreadyInSuggestions = true; } } return elementAlreadyInSuggestions; }
/** * finds elements which are the top most in the ontology... * * @return */ private List<OntologyElement> findGenericOntologyElements(Integer max) throws Exception { List<OntologyElement> elements = new ArrayList<OntologyElement>(); Set<String> suggestions = luceneAnnotator.findPropertyURIs(max); // now remove all that are after max List<String> list = new ArrayList<String>(suggestions); if (suggestions != null && suggestions.size() > max) { for (int i = max; i < suggestions.size(); i++) { String prop = list.get(i); suggestions.remove(prop); } } elements.addAll(returnPropertyElements(suggestions)); long start = System.currentTimeMillis(); Set<String> classUris = luceneAnnotator.findTopClasses(); long end = System.currentTimeMillis(); logger.info("Finished searching lucene for top classes for:" + (end - start) + "ms."); if (classUris == null) classUris = new HashSet(); logger.info("Found " + classUris.size() + " top classes."); // now remove all that are after max List<String> newList = new ArrayList<String>(classUris); if (newList.size() > max) { for (int i = max; i < newList.size(); i++) { String classUri = newList.get(i); classUris.remove(classUri); } } for (String euri : classUris) { SerializableURI elementUri = null; try { elementUri = new SerializableURI(euri, false); } catch (Exception e) { e.printStackTrace(); } OntologyElement e = new ClassElement(); e.setData(elementUri); elements.add(e); } return elements; }
/** * if element is datatype property then add min, max and sum as options * * @param element * @param poc * @return */ List<Vote> addAdditionalVotes( OntologyElement element, POC poc, String text, String propertyUri, boolean alreadyAdded) { // if element is datatype property add suggestions with min, // max, sum List<Vote> votes = new ArrayList<Vote>(); // logger.info("Adding additional votes(min,max,etc), poc is:"+poc.toString()); if (element instanceof PropertyElement) { PropertyElement clonedElement = new PropertyElement(); String thisPropertyUri = ((PropertyElement) element).getData().toString(); // logger.info("thisPropertyUri "+thisPropertyUri); if (luceneAnnotator.isItDatatypeProperty(thisPropertyUri)) { ((PropertyElement) clonedElement).setDatatypeProperty(true); boolean containsJJ = false; containsJJ = treeUtils.pocContainsJJs(poc); // logger.info("Checking whether it contains JJ*..." + containsJJ); if (containsJJ) { if (alreadyAdded) clonedElement.setAlreadyAdded(true); else clonedElement.setAlreadyAdded(false); clonedElement.setAnnotation(element.getAnnotation()); clonedElement.setData(element.getData()); if (((PropertyElement) element).getRange() != null) clonedElement.setRange(((PropertyElement) element).getRange()); if (((PropertyElement) element).getDomain() != null) clonedElement.setDomain(((PropertyElement) element).getDomain()); if (((PropertyElement) element).getGovernor() != null) clonedElement.setGovernor(((PropertyElement) element).getGovernor()); Vote vote = generateVote(text, propertyUri, clonedElement, FreyaConstants.MAX_FUNCTION); votes.add(vote); vote = generateVote(text, propertyUri, clonedElement, FreyaConstants.MIN_FUNCTION); votes.add(vote); vote = generateVote(text, propertyUri, clonedElement, FreyaConstants.SUM_FUNCTION); votes.add(vote); vote = generateVote(text, propertyUri, clonedElement, FreyaConstants.AVG_FUNCTION); votes.add(vote); } } } return votes; }
/** * @param candidateElements * @param avoidThese * @param poc * @param neighbour * @param text * @return */ List<Vote> generateVotesFromOntologyElements( List<OntologyElement> candidateElements, Set<String> avoidThese, POC poc, SemanticConcept neighbour, String text) throws Exception { List<Vote> votes = new ArrayList<Vote>(); List<String> listOfSuggestionUris = new ArrayList<String>(); List<DatatypePropertyValueIdentifier> listOfDTPVIdentifiers = new ArrayList<DatatypePropertyValueIdentifier>(); for (OntologyElement element : candidateElements) { // skip adding suggestions which are already in ocs if (avoidThese.contains(element.getData().toString())) continue; // run some similarity metric here and use some threshold... // if similarityScore>threshold then add this to the // clarificationOptions String pPropertyShortNameOrUri = null; String suggestion = ((SerializableURI) element.getData()).toString(); try { SerializableURI elementUri = new SerializableURI(suggestion, false); pPropertyShortNameOrUri = elementUri.getResourceName(); } catch (Exception e) { pPropertyShortNameOrUri = suggestion; } if (!listOfSuggestionUris.contains(suggestion)) { listOfSuggestionUris.add(suggestion); if (poc.getAnnotation() != null) element.setAnnotation(poc.getAnnotation()); // check whether the element is datatype property and add // the governor if (neighbour != null) element = addGovernor(element, neighbour.getOntologyElement()); /* cloning elements */ Vote vote = generateVote(text, pPropertyShortNameOrUri, element, null); votes.add(vote); votes.addAll(addAdditionalVotes(element, poc, text, pPropertyShortNameOrUri, false)); } } return votes; }
/** * generate property elements from the set of property uris, and check whether these properties * are datatype so that additional flag can be added to the property elements * * @param classes * @return */ List<OntologyElement> returnPropertyElements(Set<String> properties) throws Exception { if (properties == null) return new ArrayList(); List<OntologyElement> elements = new ArrayList<OntologyElement>(); for (String euri : properties) { // logger.info("Checking if datatypeproperty=true for: " + euri); SerializableURI elementUri = null; try { elementUri = new SerializableURI(euri, false); } catch (Exception e) { e.printStackTrace(); } OntologyElement e = new PropertyElement(); // so that later we can distinguish property elements if they refer // to datatypes if (luceneAnnotator.isItDatatypeProperty(euri)) { ((PropertyElement) e).setDatatypeProperty(true); } e.setData(elementUri); elements.add(((PropertyElement) e)); } return elements; }
/** * Finds clarification options for given interpretation by applying rules such as: match head with * some existing (datatype)property of the OntoRes which is first in the query (if there is such * OntoRes) what is city population of california? head: population modf: city unified two methods * on 27.02.2010. what is the largest city in california? (city is governor, largest is dependent) * s: is 'largest' related to: <list od datatype properties which are related to cities (in * california)> * * @param key * @return */ public List<Vote> generateVotes( SuggestionKey key, POC poc, boolean addNone, List<SemanticConcept> toSkip) { logger.debug("Before generating suggestions, this many needs to be skipped: " + toSkip.size()); long start = System.currentTimeMillis(); Set<String> avoidThese = new HashSet<String>(); if (toSkip != null) for (SemanticConcept concept : toSkip) { String uri = concept.getOntologyElement().getData().toString(); avoidThese.add(uri); } long end = System.currentTimeMillis(); List<Vote> votes = new ArrayList<Vote>(); logger.info("Moved OCs to the list to avoid for " + (end - start) + "ms."); List<SemanticConcept> neighbours = key.getNearestNeighbours(); String text = key.getText(); Integer num = null; if (neighbours != null) num = neighbours.size(); logger.info("Found " + num + " neighbours for " + key.getText()); /* * this list ensures that the suggestions with an URI is added only once as if there are more than one * NNeighbours it can happen that the same suggestion is generated for each neighbour */ Set<OntologyElement> allCandidateElements = new HashSet<OntologyElement>(); for (SemanticConcept neighbour : neighbours) { start = System.currentTimeMillis(); Set<OntologyElement> candidateElements = new HashSet<OntologyElement>(); List<OntologyElement> tmpCandidates; try { tmpCandidates = findCandidates(neighbour, text); end = System.currentTimeMillis(); logger.info("Finding candidates for:" + (end - start) + "ms"); start = System.currentTimeMillis(); for (OntologyElement el : tmpCandidates) { boolean elementAlreadyInSuggestions = elementAlreadyInSuggestions(new ArrayList(allCandidateElements), el); if (!elementAlreadyInSuggestions) { candidateElements.add(el); allCandidateElements.add(el); } { logger.debug("Skipping, element already added:" + el.getData().toString()); } } } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } end = System.currentTimeMillis(); logger.info("Checking whether candidates already added for:" + (end - start) + "ms"); logger.info( "Found " + candidateElements.size() + " suggestions (function elements not counted) for NN:" + neighbour.getOntologyElement().getData().toString() + " and text:" + text + ", total up to now:" + allCandidateElements.size() + ", total to remove: " + avoidThese.size()); start = System.currentTimeMillis(); try { votes.addAll( generateVotesFromOntologyElements( new ArrayList(candidateElements), avoidThese, poc, neighbour, text)); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } end = System.currentTimeMillis(); logger.info( "Generating Ontology Elements for:" + (end - start) + "ms, " + votes.size() + " suggestions to be shown."); // //////////////////////////////////////////////////// // ////////add the closest element if the poc to be clarified is // adjective // ////////////////////////////////////////////////// OntologyElement closestOntologyConcept = neighbour.getOntologyElement(); // if (!candidateElements.contains(closestOntologyConcept)) { boolean elementAlreadyInSuggestions = elementAlreadyInSuggestions(new ArrayList(candidateElements), closestOntologyConcept); if (!elementAlreadyInSuggestions) { SerializableURI uriUri = null; String cocSuggestion = closestOntologyConcept.getData().toString(); String cocUriOrLiteral = null; try { uriUri = new SerializableURI(cocSuggestion, false); cocUriOrLiteral = uriUri.getResourceName(); } catch (Exception e) { cocUriOrLiteral = cocSuggestion; } votes.addAll(addAdditionalVotes(closestOntologyConcept, poc, text, cocUriOrLiteral, true)); } } start = System.currentTimeMillis(); // here we add datatypeProperties with no domain etc. List<OntologyElement> hangingElements = new ArrayList<OntologyElement>(); try { hangingElements.addAll(findHangingElements()); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } List filteredHangingElements = new ArrayList(); for (OntologyElement el : hangingElements) { boolean elementAlreadyInSuggestions = elementAlreadyInSuggestions(new ArrayList(allCandidateElements), el); if (!elementAlreadyInSuggestions) filteredHangingElements.add(el); } try { votes.addAll( generateVotesFromOntologyElements(filteredHangingElements, avoidThese, poc, null, text)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } end = System.currentTimeMillis(); logger.info("Added hanging Elements for:" + (end - start) + "ms"); // add None element if (addNone) { Vote vote = VoteGenerator.generateNoneVote(incrementer.incrementAndGet()); vote.getCandidate().getOntologyElement().setAnnotation(poc.getAnnotation()); votes.add(vote); } // now call wordnet to recalculate scores return votes; }
/** * find candidate ontology elements * * @return */ private List<OntologyElement> findCandidates(SemanticConcept sc, String text) throws Exception { // Set<String> allSuggestions = new HashSet<String>(); OntologyElement el = sc.getOntologyElement(); // what is the population of california? // what is the population of cities in california? // what is the highest point of california? // String uri = null; Set<String> classUris = new HashSet<String>(); if (el instanceof InstanceElement) { List<String> allClassUris = ((InstanceElement) el).getClassURIList(); logger.info("There were:" + allClassUris.size() + " direct types"); // classUris = filterDirectTypes(allClassUris); classUris = new HashSet(allClassUris); logger.info("There is NO FILTERING of direct types..."); } else if (el instanceof InstanceListElement) { List allClassUris = ((InstanceListElement) el).getClassURIList(); logger.info("There were:" + allClassUris.size() + " direct types"); // classUris = filterDirectTypes(allClassUris); classUris = new HashSet(allClassUris); logger.info("There is NO FILTERING of direct types..."); } else if (el instanceof ClassElement) { String uri = ((SerializableURI) el.getData()).toString(); classUris.add(uri); logger.info("NN is class" + uri); } else if (el instanceof DatatypePropertyValueElement) { logger.info("NN is DPVE " + el.getData()); return findCandidatesForADTPV(((DatatypePropertyValueElement) el), text); } else if (el instanceof PropertyElement) { logger.info("NN is property " + el.getData()); return findCandidatesForAProperty(((PropertyElement) el), text); } Set<OntologyElement> elements = new HashSet<OntologyElement>(); Set<String> listOfPotentialCandidates = new HashSet<String>(); Set<String> classes = new HashSet<String>(); long start = System.currentTimeMillis(); for (String uri : classUris) { // //////////properties first////////////////////////// listOfPotentialCandidates = findCandidatesForClass(uri); elements.addAll(returnPropertyElements(listOfPotentialCandidates)); // allSuggestions.addAll(listOfPotentialCandidates); } logger.info("Found " + elements.size() + " elements so far."); // //////////////////////////////////// for (String uri : classUris) { classes.addAll( luceneAnnotator.getNeighbouringClassesWhereGivenClassIsADomain(uri, forceSuperClasses)); logger.info("Found " + classes.size() + " class candidates from IsADomain method."); classes.addAll( luceneAnnotator.getNeighbouringClassesWhereGivenClassIsARange(uri, forceSuperClasses)); logger.info( "Found more, now total: " + classes.size() + " class candidates from IsADomain and isARange method."); } Set<String> filteredClasses = new HashSet<String>(); for (String t : classes) { if (!isInIgnoreNameSpaceList(t)) { filteredClasses.add(t); } } elements.addAll(returnClassElements(filteredClasses)); long end = System.currentTimeMillis(); logger.info( "Found " + elements.size() + " cadidates for " + classUris.size() + " classes for " + (end - start) + "ms."); return new ArrayList(elements); }